Mercurial > repos > iuc > data_manager_star_index_builder
comparison data_manager/macros.xml @ 10:a225487bf618 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit ae6b59a8e52fd34e2347d1fd8d34129c36779266
| author | iuc |
|---|---|
| date | Fri, 17 Feb 2023 20:00:58 +0000 |
| parents | c520a52b5174 |
| children | d63c1442407f |
comparison
equal
deleted
inserted
replaced
| 9:c520a52b5174 | 10:a225487bf618 |
|---|---|
| 1 <macros> | 1 <macros> |
| 2 <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager | 2 <!-- REMEMBER to bump the version of @IDX_VERSION_SUFFIX@ |
| 3 whenever you make changes to the following two version tokens! | 3 whenever you make changes to the @TOOL_VERSION@ token! |
| 4 The data manager uses a symlink to this macro file to keep the STAR and | 4 The data manager uses a symlink to this macro file to keep the STAR and |
| 5 the index versions in sync, but you should manually adjust the +galaxy | 5 the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ --> |
| 6 version number. --> | |
| 7 <!-- STAR version to be used --> | 6 <!-- STAR version to be used --> |
| 8 <token name="@VERSION@">2.7.8a</token> | 7 <token name="@TOOL_VERSION@">2.7.10b</token> |
| 8 <token name="@VERSION_SUFFIX@">0</token> | |
| 9 <token name="@PROFILE@">21.01</token> | |
| 9 <!-- STAR index version compatible with this version of STAR | 10 <!-- STAR index version compatible with this version of STAR |
| 10 This is the STAR version that introduced the index structure expected | 11 This is the STAR version that introduced the index structure expected |
| 11 by the current version. | 12 by the current version. |
| 12 It can be found for any specific version of STAR with: | 13 It can be found for any specific version of STAR with: |
| 13 STAR -h | grep versionGenome | 14 STAR -h | grep versionGenome |
| 14 or by looking for the versionGenome parameter in source/parametersDefault | 15 or by looking for the versionGenome parameter in source/parametersDefault |
| 15 of STAR's source code --> | 16 of STAR's source code --> |
| 16 <token name="@IDX_VERSION@">2.7.4a</token> | 17 <token name="@IDX_VERSION@">2.7.4a</token> |
| 18 <token name="@IDX_VERSION_SUFFIX@">1</token> | |
| 17 <token name="@IDX_DATA_TABLE@">rnastar_index2x_versioned</token> | 19 <token name="@IDX_DATA_TABLE@">rnastar_index2x_versioned</token> |
| 18 | 20 |
| 19 <xml name="requirements"> | 21 <xml name="requirements"> |
| 20 <requirements> | 22 <requirements> |
| 21 <requirement type="package" version="@VERSION@">star</requirement> | 23 <requirement type="package" version="@TOOL_VERSION@">star</requirement> |
| 22 <requirement type="package" version="1.9">samtools</requirement> | 24 <requirement type="package" version="1.16.1">samtools</requirement> |
| 25 <requirement type="package" version="1.12">gzip</requirement> | |
| 23 <yield /> | 26 <yield /> |
| 24 </requirements> | 27 </requirements> |
| 25 </xml> | 28 </xml> |
| 26 | 29 |
| 27 <xml name="edam"> | 30 <xml name="edam"> |
| 33 <edam_operation>operation_0292</edam_operation> | 36 <edam_operation>operation_0292</edam_operation> |
| 34 </edam_operations> | 37 </edam_operations> |
| 35 </xml> | 38 </xml> |
| 36 | 39 |
| 37 <xml name="index_selection" token_with_gene_model="0"> | 40 <xml name="index_selection" token_with_gene_model="0"> |
| 38 <param argument="--genomeDir" name="genomeDir" type="select" | 41 <param argument="--genomeDir" type="select" |
| 39 label="Select reference genome" | 42 label="Select reference genome" |
| 40 help="If your genome of interest is not listed, contact the Galaxy team"> | 43 help="If your genome of interest is not listed, contact the Galaxy team"> |
| 41 <options from_data_table="@IDX_DATA_TABLE@"> | 44 <options from_data_table="@IDX_DATA_TABLE@"> |
| 42 <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" /> | 45 <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" /> |
| 43 <filter type="static_value" column="5" value="@IDX_VERSION@" /> | 46 <filter type="static_value" column="5" value="@IDX_VERSION@" /> |
| 53 <xml name="citations"> | 56 <xml name="citations"> |
| 54 <citations> | 57 <citations> |
| 55 <citation type="doi">10.1093/bioinformatics/bts635</citation> | 58 <citation type="doi">10.1093/bioinformatics/bts635</citation> |
| 56 </citations> | 59 </citations> |
| 57 </xml> | 60 </xml> |
| 58 <xml name="@SJDBOPTIONS@" token_optional="true"> | 61 <xml name="SJDBOPTIONS"> |
| 59 <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="@OPTIONAL@" help="Exon junction information for mapping splices"/> | 62 <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/> |
| 60 <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/> | 63 <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/> |
| 61 </xml> | 64 </xml> |
| 62 <xml name="dbKeyActions"> | 65 <xml name="dbKeyActions"> |
| 63 <actions> | 66 <actions> |
| 64 <conditional name="refGenomeSource.geneSource"> | 67 <conditional name="refGenomeSource.geneSource"> |
| 79 </actions> | 82 </actions> |
| 80 </xml> | 83 </xml> |
| 81 <token name="@TEMPINDEX@"><![CDATA[ | 84 <token name="@TEMPINDEX@"><![CDATA[ |
| 82 ## Create temporary index for custom reference | 85 ## Create temporary index for custom reference |
| 83 #if str($refGenomeSource.geneSource) == 'history': | 86 #if str($refGenomeSource.geneSource) == 'history': |
| 87 #if $refGenomeSource.genomeFastaFiles.ext == "fasta" | |
| 88 ln -s '$refGenomeSource.genomeFastaFiles' refgenome.fa && | |
| 89 #else | |
| 90 gunzip -c '$refGenomeSource.genomeFastaFiles' > refgenome.fa && | |
| 91 #end if | |
| 84 mkdir -p tempstargenomedir && | 92 mkdir -p tempstargenomedir && |
| 85 STAR | 93 STAR |
| 86 --runMode genomeGenerate | 94 --runMode genomeGenerate |
| 87 --genomeDir 'tempstargenomedir' | 95 --genomeDir 'tempstargenomedir' |
| 88 --genomeFastaFiles '${refGenomeSource.genomeFastaFiles}' | 96 --genomeFastaFiles refgenome.fa |
| 89 ## Handle difference between indices with/without annotations | 97 ## Handle difference between indices with/without annotations |
| 90 #if 'GTFconditional' in $refGenomeSource: | 98 #if 'GTFconditional' in $refGenomeSource: |
| 91 ## GTFconditional exists only in STAR, but not STARsolo | 99 ## GTFconditional exists only in STAR, but not STARsolo |
| 92 #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf': | 100 #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf': |
| 93 --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}' | 101 --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}' |
| 107 #end if | 115 #end if |
| 108 #if str($refGenomeSource.genomeSAindexNbases): | 116 #if str($refGenomeSource.genomeSAindexNbases): |
| 109 --genomeSAindexNbases ${refGenomeSource.genomeSAindexNbases} | 117 --genomeSAindexNbases ${refGenomeSource.genomeSAindexNbases} |
| 110 #end if | 118 #end if |
| 111 --runThreadN \${GALAXY_SLOTS:-4} | 119 --runThreadN \${GALAXY_SLOTS:-4} |
| 120 ## in bytes | |
| 121 --limitGenomeGenerateRAM \$((\${GALAXY_MEMORY_MB:-31000} * 1000000)) | |
| 112 && | 122 && |
| 113 #end if | 123 #end if |
| 114 ]]></token> | 124 ]]></token> |
| 115 <token name="@REFGENOMEHANDLING@" ><![CDATA[ | 125 <token name="@REFGENOMEHANDLING@" ><![CDATA[ |
| 116 --runThreadN \${GALAXY_SLOTS:-4} | 126 --runThreadN \${GALAXY_SLOTS:-4} |
| 119 #if str($refGenomeSource.geneSource) == 'history': | 129 #if str($refGenomeSource.geneSource) == 'history': |
| 120 tempstargenomedir | 130 tempstargenomedir |
| 121 #else: | 131 #else: |
| 122 '${refGenomeSource.GTFconditional.genomeDir.fields.path}' | 132 '${refGenomeSource.GTFconditional.genomeDir.fields.path}' |
| 123 ## Handle difference between indices with/without annotations | 133 ## Handle difference between indices with/without annotations |
| 124 #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf': | 134 #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf-with-gtf': |
| 125 #if $refGenomeSource.GTFconditional.sjdbGTFfile: | 135 --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang |
| 126 --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang | 136 --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' |
| 127 --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' | 137 #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3': |
| 128 #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3': | 138 --sjdbGTFtagExonParentTranscript Parent |
| 129 --sjdbGTFtagExonParentTranscript Parent | |
| 130 #end if | |
| 131 #end if | 139 #end if |
| 132 #end if | 140 #end if |
| 133 #end if | 141 #end if |
| 134 ]]></token> | 142 ]]></token> |
| 135 <token name="@READSHANDLING@" ><![CDATA[ | 143 <token name="@READSHANDLING@" ><![CDATA[ |
| 136 ## Check that the input pairs are of the same type | 144 ## Check that the input pairs are of the same type |
| 137 ## otherwise STARsolo will run for a long time and then error out. | 145 ## otherwise STARsolo will run for a long time and then error out. |
| 138 ## We consume either repeats of two inputs R1 + R2 | 146 ## We consume either repeats of two inputs R1 + R2 |
| 139 ## or a collection of paired reads. | 147 ## or a collection of paired reads. |
| 159 --soloCBmatchWLtype $sc.soloCBmatchWLtype | 167 --soloCBmatchWLtype $sc.soloCBmatchWLtype |
| 160 #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'): | 168 #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'): |
| 161 @FASTQ_GZ_OPTION@ | 169 @FASTQ_GZ_OPTION@ |
| 162 #end if | 170 #end if |
| 163 ]]></token> | 171 ]]></token> |
| 172 <token name="@LIMITS@" ><![CDATA[ | |
| 173 --limitOutSJoneRead $getVar('algo.params.junction_limits.limitOutSJoneRead', $getVar('solo.junction_limits.limitOutSJoneRead', 1000)) | |
| 174 --limitOutSJcollapsed $getVar('algo.params.junction_limits.limitOutSJcollapsed', $getVar('solo.junction_limits.limitOutSJcollapsed', 1000000)) | |
| 175 --limitSjdbInsertNsj $getVar('algo.params.junction_limits.limitSjdbInsertNsj', $getVar('solo.junction_limits.limitSjdbInsertNsj', 1000000)) | |
| 176 ]]></token> | |
| 164 <xml name="ref_selection"> | 177 <xml name="ref_selection"> |
| 165 <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" /> | 178 <param argument="--genomeFastaFiles" type="data" format="fasta,fasta.gz" label="Select a reference genome" /> |
| 166 <param argument="--genomeSAindexNbases" type="integer" min="2" max="16" value="14" label="Length of the SA pre-indexing string" help="Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1)"/> | 179 <param argument="--genomeSAindexNbases" type="integer" min="2" max="16" value="14" label="Length of the SA pre-indexing string" help="Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1)"/> |
| 167 </xml> | 180 </xml> |
| 168 <xml name="stdio" > | 181 <xml name="stdio" > |
| 169 <stdio> | 182 <stdio> |
| 170 <regex match="FATAL error" source="both" level="fatal"/> | 183 <regex match="FATAL error" source="both" level="fatal"/> |
| 243 <option value="Hamming" selected="true" >Adapter clipping based on Hamming distance</option> | 256 <option value="Hamming" selected="true" >Adapter clipping based on Hamming distance</option> |
| 244 <option value="CellRanger4" >5p and 3p adapter clipping similar to CellRanger4</option> | 257 <option value="CellRanger4" >5p and 3p adapter clipping similar to CellRanger4</option> |
| 245 <option value="None" >No adapter clipping</option> | 258 <option value="None" >No adapter clipping</option> |
| 246 </param> | 259 </param> |
| 247 </xml> | 260 </xml> |
| 261 <xml name="common_SAM_attributes"> | |
| 262 <option value="NH" selected="true">NH (number of reported alignments/hits for the read)</option> | |
| 263 <option value="HI" selected="true">HI (query hit index)</option> | |
| 264 <option value="AS" selected="true">AS (local alignment score)</option> | |
| 265 <option value="nM" selected="true">nM (number of mismatches per (paired) alignment)</option> | |
| 266 <option value="NM">NM (edit distance of the aligned read to the reference)</option> | |
| 267 <option value="MD">MD (string for mismatching positions)</option> | |
| 268 <option value="jM">jM (intron motifs for all junctions)</option> | |
| 269 <option value="jI">jI (1-based start and end of introns for all junctions)</option> | |
| 270 </xml> | |
| 271 <xml name="limits"> | |
| 272 <section name="junction_limits" title="Junction Limits" expanded="false"> | |
| 273 <param argument="--limitOutSJoneRead" type="integer" min="1" value="1000" label="Maximum number of junctions for one read (including all multimappers)" /> | |
| 274 <param argument="--limitOutSJcollapsed" type="integer" min="1" value="1000000" label="Maximum number of collapsed junctions" /> | |
| 275 <param argument="--limitSjdbInsertNsj" type="integer" min="0" value="1000000" label="Maximum number of inserts to be inserted into the genome on the fly." /> | |
| 276 </section> | |
| 277 </xml> | |
| 278 <xml name="outCountActions"> | |
| 279 <actions> | |
| 280 <action name="column_names" type="metadata" default="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" /> | |
| 281 </actions> | |
| 282 </xml> | |
| 283 <xml name="outWig"> | |
| 284 <conditional name="outWig"> | |
| 285 <param name="outWigType" type="select" label="Compute coverage"> | |
| 286 <option value="None">No coverage</option> | |
| 287 <option value="bedGraph">Yes in bedgraph format</option> | |
| 288 <option value="wiggle">Yes in wiggle format</option> | |
| 289 </param> | |
| 290 <when value="None"> | |
| 291 <!-- This is necessary for the filtering of output --> | |
| 292 <param name="outWigStrand" type="hidden" value="false" /> | |
| 293 </when> | |
| 294 <when value="bedGraph"> | |
| 295 <expand macro="outWigParams"/> | |
| 296 </when> | |
| 297 <when value="wiggle"> | |
| 298 <expand macro="outWigParams"/> | |
| 299 </when> | |
| 300 </conditional> | |
| 301 </xml> | |
| 302 <xml name="outWigParams"> | |
| 303 <param name="outWigTypeSecondWord" type="select" label="Input for coverage"> | |
| 304 <option value="">Default (everything that mapped)</option> | |
| 305 <option value="read_5p">signal from only 5’ of the 1st read</option> | |
| 306 <option value="read2">signal from only 2nd read</option> | |
| 307 </param> | |
| 308 <param argument="--outWigStrand" type="boolean" truevalue="Stranded" falsevalue="Unstranded" checked="true" label="collapse strands (unstranded coverage)" help="By default, the strands are separated."/> | |
| 309 <param argument="--outWigReferencesPrefix" type="text" value="-" label="prefix matching reference name" help="For example, set 'chr' if you mapped on an ensembl genome but you want to display on UCSC"/> | |
| 310 <param argument="--outWigNorm" type="boolean" truevalue="RPM" falsevalue="None" checked="true" label="Normalize coverage to million of mapped reads (RPM)"/> | |
| 311 </xml> | |
| 312 <token name="@OUTWIG@"><![CDATA[ | |
| 313 #if str($outWig.outWigType) != 'None': | |
| 314 --outWigType '$outWig.outWigType' '$outWig.outWigTypeSecondWord' | |
| 315 --outWigStrand '$outWig.outWigStrand' | |
| 316 --outWigReferencesPrefix '$outWig.outWigReferencesPrefix' | |
| 317 --outWigNorm '$outWig.outWigNorm' | |
| 318 #end if | |
| 319 ]]></token> | |
| 320 <token name="@OUTWIGOUTPUTS@"><![CDATA[ | |
| 321 #if str($outWig.outWigType) == "bedGraph": | |
| 322 && mv Signal.Unique.str1.out.bg Signal.Unique.str1.out | |
| 323 && mv Signal.UniqueMultiple.str1.out.bg Signal.UniqueMultiple.str1.out | |
| 324 #if str($outWig.outWigStrand) == "Stranded": | |
| 325 && mv Signal.Unique.str2.out.bg Signal.Unique.str2.out | |
| 326 && mv Signal.UniqueMultiple.str2.out.bg Signal.UniqueMultiple.str2.out | |
| 327 #end if | |
| 328 #elif str($outWig.outWigType) == "wiggle": | |
| 329 && mv Signal.Unique.str1.out.wig Signal.Unique.str1.out | |
| 330 && mv Signal.UniqueMultiple.str1.out.wig Signal.UniqueMultiple.str1.out | |
| 331 #if str($outWig.outWigStrand) == "Stranded": | |
| 332 && mv Signal.Unique.str2.out.wig Signal.Unique.str2.out | |
| 333 && mv Signal.UniqueMultiple.str2.out.wig Signal.UniqueMultiple.str2.out | |
| 334 #end if | |
| 335 #end if | |
| 336 ]]></token> | |
| 337 <xml name="outWigOutputs"> | |
| 338 <data format="bedgraph" name="signal_unique_str1" label="${tool.name} on ${on_string}: Coverage Uniquely mapped strand 1" from_work_dir="Signal.Unique.str1.out"> | |
| 339 <filter>outWig['outWigType'] != "None"</filter> | |
| 340 <expand macro="dbKeyActions" /> | |
| 341 <change_format> | |
| 342 <when input="outWig.outWigType" value="wiggle" format="wig" /> | |
| 343 </change_format> | |
| 344 </data> | |
| 345 <data format="bedgraph" name="signal_uniquemultiple_str1" label="${tool.name} on ${on_string}: Coverage Uniquely + Multiple mapped strand 1" from_work_dir="Signal.UniqueMultiple.str1.out"> | |
| 346 <filter>outWig['outWigType'] != "None"</filter> | |
| 347 <expand macro="dbKeyActions" /> | |
| 348 <change_format> | |
| 349 <when input="outWig.outWigType" value="wiggle" format="wig" /> | |
| 350 </change_format> | |
| 351 </data> | |
| 352 <data format="bedgraph" name="signal_unique_str2" label="${tool.name} on ${on_string}: Coverage Uniquely mapped strand 2" from_work_dir="Signal.Unique.str2.out"> | |
| 353 <filter>outWig['outWigType'] != "None" and outWig['outWigStrand']</filter> | |
| 354 <expand macro="dbKeyActions" /> | |
| 355 <change_format> | |
| 356 <when input="outWig.outWigType" value="wiggle" format="wig" /> | |
| 357 </change_format> | |
| 358 </data> | |
| 359 <data format="bedgraph" name="signal_uniquemultiple_str2" label="${tool.name} on ${on_string}: Coverage Uniquely + Multiple mapped strand 2" from_work_dir="Signal.UniqueMultiple.str2.out"> | |
| 360 <filter>outWig['outWigType'] != "None" and outWig['outWigStrand']</filter> | |
| 361 <expand macro="dbKeyActions" /> | |
| 362 <change_format> | |
| 363 <when input="outWig.outWigType" value="wiggle" format="wig" /> | |
| 364 </change_format> | |
| 365 </data> | |
| 366 </xml> | |
| 367 <xml name="quantMode"> | |
| 368 <conditional name="quantmode_output"> | |
| 369 <param argument="--quantMode" type="select" | |
| 370 label="Per gene/transcript output" | |
| 371 help="STAR can provide analysis results not only with respect to the reference genome, but also with respect to genes and transcripts described by a gene model. Note: This functionality requires either the selection above of a cached index with a gene model, or a gene model provided alongside the index/reference genome in GTF or GFF3 format!"> | |
| 372 <option value="-">No per gene or transcript output</option> | |
| 373 <option value="GeneCounts">Per gene read counts (GeneCounts)</option> | |
| 374 <option value="TranscriptomeSAM">Transcript-based BAM output (TranscriptomeSAM)</option> | |
| 375 <option value="TranscriptomeSAM GeneCounts">Both per gene read counts and transcript-based BAM output (TranscriptomeSAM GeneCounts)</option> | |
| 376 </param> | |
| 377 <when value="-" /> | |
| 378 <when value="GeneCounts" /> | |
| 379 <when value="TranscriptomeSAM"> | |
| 380 <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" | |
| 381 label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" | |
| 382 help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled." /> | |
| 383 </when> | |
| 384 <when value="TranscriptomeSAM GeneCounts"> | |
| 385 <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" | |
| 386 label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" | |
| 387 help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled." /> | |
| 388 </when> | |
| 389 </conditional> | |
| 390 </xml> | |
| 391 <xml name="quantModeNoGTF"> | |
| 392 <conditional name="quantmode_output"> | |
| 393 <param argument="--quantMode" type="select" | |
| 394 label="Per gene/transcript output"> | |
| 395 <option value="-">No per gene or transcript output as no GTF was provided</option> | |
| 396 </param> | |
| 397 <when value="-" /> | |
| 398 </conditional> | |
| 399 </xml> | |
| 248 </macros> | 400 </macros> |
