comparison sailfish.xml @ 5:1b4ed566a41c draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sailfish commit 03edb751808fef8bce744ebcbad5661a32373211
author bgruening
date Wed, 02 Nov 2016 10:30:36 -0400
parents 03c74355227f
children 5bc9cd008ceb
comparison
equal deleted inserted replaced
4:03c74355227f 5:1b4ed566a41c
1 <tool id="sailfish" name="Sailfish" version="0.7.6.1"> 1 <tool id="sailfish" name="Sailfish" version="0.10.1">
2 <description>transcript quantification from RNA-seq data</description> 2 <description>transcript quantification from RNA-seq data</description>
3 <requirements>
4 <requirement type="package" version="0.7.6">sailfish</requirement>
5 <requirement type="package" version="1.57.0">boost</requirement>
6 </requirements>
7 <macros> 3 <macros>
8 <xml name="strandedness"> 4 <xml name="strandedness">
9 <param name="strandedness" type="select" label="Specify the strandedness of the reads"> 5 <param name="strandedness" type="select" label="Specify the strandedness of the reads">
10 <option value="U" selected="True">Not stranded (U)</option> 6 <option value="U" selected="True">Not stranded (U)</option>
11 <option value="SF">read 1 (or single-end read) comes from the forward strand (SF)</option> 7 <option value="SF">read 1 (or single-end read) comes from the forward strand (SF)</option>
12 <option value="SR">read 1 (or single-end read) comes from the reverse strand (SR)</option> 8 <option value="SR">read 1 (or single-end read) comes from the reverse strand (SR)</option>
13 </param> 9 </param>
14 </xml> 10 </xml>
15 </macros> 11 </macros>
12 <requirements>
13 <requirement type="package" version="0.10.1">sailfish</requirement>
14 </requirements>
16 <stdio> 15 <stdio>
17 <exit_code range="1:" /> 16 <exit_code range="1:" />
18 <exit_code range=":-1" /> 17 <exit_code range=":-1" />
19 <regex match="Error:" /> 18 <regex match="Error:" />
20 <regex match="Exception:" /> 19 <regex match="Exception:" />
21 <regex match="Exception :" /> 20 <regex match="Exception :" />
22 </stdio> 21 </stdio>
23 <version_command>sailfish -version</version_command> 22 <version_command>sailfish -version</version_command>
24 <command> 23 <command>
25 <![CDATA[ 24 <![CDATA[
26
27 #if $refTranscriptSource.TranscriptSource == "history": 25 #if $refTranscriptSource.TranscriptSource == "history":
28 sailfish index 26 sailfish index
29 --transcripts $refTranscriptSource.ownFile 27 --transcripts $refTranscriptSource.ownFile
30 --kmerSize $refTranscriptSource.kmerSize 28 --kmerSize $refTranscriptSource.kmerSize
31 --out ./index_dir 29 --out ./index_dir
32 --threads "\${GALAXY_SLOTS:-4}" 30 --threads "\${GALAXY_SLOTS:-4}"
33 #set $index_path = './index_dir' 31 #set $index_path = './index_dir'
34 #else: 32 #else:
35 #set $index_path = $refTranscriptSource.index.fields.path 33 #set $index_path = $refTranscriptSource.index.fields.path
36 #end if 34 #end if
37
38 && 35 &&
39
40 #if $single_or_paired.single_or_paired_opts == 'single': 36 #if $single_or_paired.single_or_paired_opts == 'single':
41
42 #if $single_or_paired.input_singles.ext == 'fasta': 37 #if $single_or_paired.input_singles.ext == 'fasta':
43 #set $ext = 'fasta' 38 #set $ext = 'fasta'
44 #else: 39 #else:
45 #set $ext = 'fastq' 40 #set $ext = 'fastq'
46 #end if 41 #end if
47
48 ln -s $single_or_paired.input_singles ./single.$ext && 42 ln -s $single_or_paired.input_singles ./single.$ext &&
49 #else: 43 #else:
50
51 #if $single_or_paired.input_mate1.ext == 'fasta': 44 #if $single_or_paired.input_mate1.ext == 'fasta':
52 #set $ext = 'fasta' 45 #set $ext = 'fasta'
53 #else: 46 #else:
54 #set $ext = 'fastq' 47 #set $ext = 'fastq'
55 #end if 48 #end if
56
57 ln -s $single_or_paired.input_mate1 ./mate1.$ext && 49 ln -s $single_or_paired.input_mate1 ./mate1.$ext &&
58 ln -s $single_or_paired.input_mate2 ./mate2.$ext && 50 ln -s $single_or_paired.input_mate2 ./mate2.$ext &&
59 #end if 51 #end if
60
61
62 #if $geneMap: 52 #if $geneMap:
63 ln -s "$geneMap" ./geneMap.$geneMap.ext && 53 ln -s "$geneMap" ./geneMap.$geneMap.ext &&
64 #end if 54 #end if
65
66 sailfish quant 55 sailfish quant
67 --index $index_path 56 --index $index_path
68 #if $single_or_paired.single_or_paired_opts == 'single': 57 #if $single_or_paired.single_or_paired_opts == 'single':
69 --libType ${single_or_paired.strandedness} 58 --libType ${single_or_paired.strandedness}
70 --unmatedReads ./single.$ext 59 --unmatedReads ./single.$ext
71 #else: 60 #else:
72 --mates1 ./mate1.$ext 61 --mates1 ./mate1.$ext
73 --mates2 ./mate2.$ext 62 --mates2 ./mate2.$ext
74 --libType "${single_or_paired.orientation}${single_or_paired.strandedness}" 63 --libType "${single_or_paired.orientation}${single_or_paired.strandedness}"
75 #end if 64 #end if
76 --output ./ 65 --output ./results
77 $biasCorrect 66 $biasCorrect
67 $gcBiasCorrect
78 --threads "\${GALAXY_SLOTS:-4}" 68 --threads "\${GALAXY_SLOTS:-4}"
79 69 $dumpEq
80 #if $fldMean: 70 #if str($gcSizeSamp):
71 --gcSizeSamp $gcSizeSamp
72 #end if
73 #if str($gcSpeedSamp):
74 --gcSpeedSamp $gcSpeedSamp
75 #end if
76 #if str($fldMean):
81 --fldMean $fldMean 77 --fldMean $fldMean
82 #end if 78 #end if
83 79 #if str($fldSD):
84 #if $fldSD:
85 --fldSD $fldSD 80 --fldSD $fldSD
86 #end if 81 #end if
87
88 #if $maxReadOcc: 82 #if $maxReadOcc:
89 --maxReadOcc $maxReadOcc 83 --maxReadOcc $maxReadOcc
90 #end if 84 #end if
91
92 #if $geneMap: 85 #if $geneMap:
93 --geneMap ./geneMap.${geneMap.ext} 86 --geneMap ./geneMap.${geneMap.ext}
94 #end if 87 #end if
95 88 $strictIntersect
96 $noEffectiveLengthCorrection 89 $noEffectiveLengthCorrection
97 $useVBOpt 90 $useVBOpt
98 $allowOrphans 91 $discardOrphans
99
100 $unsmoothedFLD 92 $unsmoothedFLD
101 --maxFragLen ${maxFragLen} 93 --maxFragLen ${maxFragLen}
102 --txpAggregationKey "${txpAggregationKey}" 94 --txpAggregationKey '${txpAggregationKey}'
103 95 $ignoreLibCompat
96 $enforceLibCompat
97 $allowDovetail
98 #if str($numBiasSamples):
99 --numBiasSamples $numBiasSamples
100 #end if
101 #if str($numFragSamples):
102 --numFragSamples $numFragSamples
103 #end if
104 #if str($numGibbsSamples):
105 --numGibbsSamples $numGibbsSamples
106 #end if
107 #if str($numBootstraps):
108 --numBootstraps $numBootstraps
109 #end if
104 ]]> 110 ]]>
105 </command> 111 </command>
106 <inputs> 112 <inputs>
107 <conditional name="refTranscriptSource"> 113 <conditional name="refTranscriptSource">
108 <param name="TranscriptSource" type="select" label="Select a reference transcriptome from your history or use a built-in index?" help="Built-ins were indexed using default options"> 114 <param name="TranscriptSource" type="select" label="Select a reference transcriptome from your history or use a built-in index?" help="Built-ins were indexed using default options">
116 <validator type="no_options" message="No indexes are available for the selected input dataset"/> 122 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
117 </options> 123 </options>
118 </param> 124 </param>
119 </when> <!-- build-in --> 125 </when> <!-- build-in -->
120 <when value="history"> 126 <when value="history">
121 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference transcriptome" help="in FASTA format" /> 127 <param name="ownFile" type="data" format="fasta" label="Select the reference transcriptome" help="in FASTA format" />
122 <param argument="kmerSize" type="integer" value="21" max="32" label="The size of the k-mer on which the index is built" 128 <param argument="kmerSize" type="integer" value="21" max="32" label="The size of the k-mer on which the index is built"
123 help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. 129 help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors.
124 The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, 130 The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers,
125 the more distinct they will be. We generally recommend using a k-mer size of at least 20."/> 131 the more distinct they will be. We generally recommend using a k-mer size of at least 20."/>
126 </when> <!-- history --> 132 </when> <!-- history -->
150 <param argument="--geneMap" type="data" format="tabular,gff,gtf" optional="True" label="File containing a mapping of transcripts to genes" 156 <param argument="--geneMap" type="data" format="tabular,gff,gtf" optional="True" label="File containing a mapping of transcripts to genes"
151 help="Calculates the aggregated gene-level abundance estimations. This file should be eiher a GTF file or tab-delimited format 157 help="Calculates the aggregated gene-level abundance estimations. This file should be eiher a GTF file or tab-delimited format
152 where each line contains the name of a transcript and the gene to which it belongs separated by a tab." /> 158 where each line contains the name of a transcript and the gene to which it belongs separated by a tab." />
153 159
154 <param argument="--biasCorrect" type="boolean" truevalue="--biasCorrect" falsevalue="" checked="False" 160 <param argument="--biasCorrect" type="boolean" truevalue="--biasCorrect" falsevalue="" checked="False"
155 label="Perform bias correction" help=""/> 161 label="Perform sequence-specific bias correction" help=""/>
162
163 <param argument="--gcBiasCorrect" type="boolean" truevalue="--gcBiasCorrect" falsevalue="" checked="False"
164 label="Perform fragment GC bias correction" help=""/>
165
166 <param argument="--dumpEq" type="boolean" truevalue="--dumpEq" falsevalue="" checked="False"
167 label="Dump the equivalence class counts that were computed during quasi-mapping." help=""/>
168
169 <param argument="--gcSizeSamp" type="integer" value="1" optional="True"
170 label="The value by which to down-sample transcripts when representing the GC content"
171 help="Larger values will reduce memory usage, but may decrease the fidelity of bias modeling results."/>
172
173 <param argument="--gcSpeedSamp" type="integer" value="1" optional="True"
174 label="The value at which the fragment length PMF is down-sampled when evaluating GC fragment bias."
175 help="Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results."/>
176
177 <param argument="--strictIntersect" type="boolean" truevalue="--strictIntersect" falsevalue="" checked="False"
178 label="Strict Intersect." help="When this flag is set, if the intersection of the
179 quasi-mappings for the left and right is empty, then all mappings for the left and all mappings
180 for the right read are reported as orphaned quasi-mappings."/>
156 181
157 <param argument="--fldMean" type="integer" value="200" optional="True" label="Calculate effective lengths" 182 <param argument="--fldMean" type="integer" value="200" optional="True" label="Calculate effective lengths"
158 help="If single end reads are being used for quantification, or there are an insufficient number of uniquely mapping reads when performing paired-end quantification 183 help="If single end reads are being used for quantification, or there are an insufficient number of uniquely
159 to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/> 184 mapping reads when performing paired-end quantification
185 to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/>
160 186
161 <param argument="--fldSD" type="integer" value="80" optional="True" label="Standard deviation" 187 <param argument="--fldSD" type="integer" value="80" optional="True" label="Standard deviation"
162 help="The standard deviation used in the fragment length distribution for single-end quantification or when an empirical distribution cannot be learned."/> 188 help="The standard deviation used in the fragment length distribution for single-end quantification or
189 when an empirical distribution cannot be learned."/>
163 190
164 <param argument="--maxReadOcc" type="integer" value="200" optional="True" label="Maximal read mapping occurence" 191 <param argument="--maxReadOcc" type="integer" value="200" optional="True" label="Maximal read mapping occurence"
165 help="Reads mapping to more than this many places won't be considered."/> 192 help="Reads mapping to more than this many places won't be considered."/>
166 193
167 <param argument="--noEffectiveLengthCorrection" type="boolean" truevalue="--noEffectiveLengthCorrection" falsevalue="" checked="False" 194 <param argument="--noEffectiveLengthCorrection" type="boolean" truevalue="--noEffectiveLengthCorrection" falsevalue="" checked="False"
168 label="Disable effective length correction" help="Disables effective length correction when computing the probability that a fragment was generated from a transcript. 195 label="Disable effective length correction" help="Disables effective length correction when computing the probability
196 that a fragment was generated from a transcript.
169 If this flag is passed in, the fragment length distribution is not taken into account when computing this probability."/> 197 If this flag is passed in, the fragment length distribution is not taken into account when computing this probability."/>
170 198
171 <param argument="--useVBOpt" type="boolean" truevalue="--useVBOpt" falsevalue="" checked="False" 199 <param argument="--useVBOpt" type="boolean" truevalue="--useVBOpt" falsevalue="" checked="False"
172 label="Use Variational Bayesian EM algorithm for optimization" help=""/> 200 label="Use Variational Bayesian EM algorithm for optimization" help="Use Variational Bayesian EM algorithm rather
173 201 than the traditional EM angorithm for optimization"/>
174 <param argument="--allowOrphans" type="boolean" truevalue="--allowOrphans" falsevalue="" checked="False" 202
175 label="Consider orphaned reads as valid hits when performing lightweight-alignment" 203 <param argument="--discardOrphans" type="boolean" truevalue="--discardOrphans" falsevalue="" checked="False"
176 help="This option will increase sensitivity (allow more reads to map and more transcripts to be detected), but may decrease specificity as orphaned alignments are more likely to be spurious."/> 204 label="Discard orphaned reads as valid hits when performing lightweight-alignment"
205 help="This option will discard orphaned fragments. This only has an effect on paired-end input, but enabling this option will discard, rather than count, any reads where only one of the paired fragments maps to a transcript."/>
177 206
178 <param argument="--unsmoothedFLD" type="boolean" truevalue="--unsmoothedFLD" falsevalue="" checked="False" 207 <param argument="--unsmoothedFLD" type="boolean" truevalue="--unsmoothedFLD" falsevalue="" checked="False"
179 label="Use the un-smoothed approach to effective length correction" help="This traditional approach works by convolving the FLD with the characteristic function over each transcript."/> 208 label="Use the un-smoothed approach to effective length correction" help="This traditional approach works by convolving the FLD with the
209 characteristic function over each transcript."/>
180 210
181 <param argument="--maxFragLen" type="integer" value="1000" optional="True" 211 <param argument="--maxFragLen" type="integer" value="1000" optional="True"
182 label="The maximum length of a fragment to consider when building the empirical fragment length distribution" 212 label="The maximum length of a fragment to consider when building the empirical fragment length distribution"
183 help=""/> 213 help=""/>
184 214
185 <param argument="--txpAggregationKey" value="gene_id" type="text" label="The key for aggregating transcripts during gene-level estimates" 215 <param argument="--txpAggregationKey" value="gene_id" type="text" label="The key for aggregating transcripts during gene-level estimates">
186 help="The default is the gene_id field, but other fields (e.g. gene_name) might be useful depending on the specifics of the annotation being used." /> 216 <help>
187 217 <![CDATA[
218 When generating the gene-level estimates, use the provided key for aggregating transcripts. The default is the "gene_id" field,
219 but other fields (e.g. "gene_name") might be useful depending on the specifics of the annotation being used. Note: this option only
220 affects aggregation when using a GTF annotation; not an annotation in "simple" format.]]>
221 </help>
222 </param>
223 <param argument="--ignoreLibCompat" type="boolean" truevalue="--ignoreLibCompat" falsevalue="" checked="False"
224 label="Disables strand-aware processing completely.">
225 <help>
226 <![CDATA[
227 All hits are considered "Valid".]]>
228 </help>
229 </param>
230 <param argument="--enforceLibCompat" type="boolean" truevalue="--enforceLibCompat" falsevalue="" checked="False"
231 label="Enforces strict library compatibility.">
232 <help>
233 <![CDATA[
234 Fragments that map in a manner other than what is specified by the expected library type will be discarded,
235 even if there are no mappings that agree with the expected library type.]]>
236 </help>
237 </param>
238 <param argument="--allowDovetail" type="boolean" truevalue="--allowDovetail" falsevalue="" checked="False"
239 label="Allow paired-end reads from the same fragment to dovetail.">
240 <help>
241 <![CDATA[
242 Allow paired-end reads from the same fragment to "dovetail", such that the ends of the mapped reads can extend past each other.]]>
243 </help>
244 </param>
245 <param argument="--numBiasSamples" type="integer" value="1000000" optional="True"
246 label="Number of fragment mappings to use when learning the sequene-specific bias model"
247 help=""/>
248 <param argument="--numFragSamples" type="integer" value="10000" optional="True"
249 label="Number of fragments from unique alignments to sample when building the fragment length distribution"
250 help=""/>
251 <param argument="--numGibbsSamples" type="integer" value="0" optional="True"
252 label="Number of Gibbs sampling rounds to perform."
253 help=""/>
254 <param argument="--numBootstraps" type="integer" value="0" optional="True"
255 label="Number of bootstrap samples to generate."
256 help="This is mutually exclusive with Gibbs"/>
188 </inputs> 257 </inputs>
258
259
189 <outputs> 260 <outputs>
190 <data name="output_quant" format="tabular" from_work_dir="quant.sf" label="${tool.name} on ${on_string} (Quantification)" /> 261 <data name="output_quant" format="tabular" from_work_dir="results/quant.sf" label="${tool.name} on ${on_string} (Quantification)" />
191 <data name="output_bias_corrected_quant" format="tabular" from_work_dir="quant_bias_corrected.sf" label="${tool.name} on ${on_string} (Bias corrected Quantification)"> 262 <data name="output_gene_quant" format="tabular" from_work_dir="results/quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)">
192 <filter>biasCorrect is True</filter> 263 <filter>geneMap</filter>
193 </data>
194 <data name="output_gene_quant" format="tabular" from_work_dir="quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)">
195 <filter>geneMap is True</filter>
196 </data> 264 </data>
197 </outputs> 265 </outputs>
198 <tests> 266 <tests>
199 <test> 267 <test>
200 <param name="single_or_paired_opts" value="paired" /> 268 <param name="single_or_paired_opts" value="paired" />
201 <param name="input_mate1" value="reads_1.fastq" /> 269 <param name="input_mate1" value="reads_1.fastq" />
202 <param name="input_mate2" value="reads_2.fastq" /> 270 <param name="input_mate2" value="reads_2.fastq" />
271 <param name="biasCorrect" value="False" />
272 <param name="TranscriptSource" value="history" />
273 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
274 <output file="sailfish_quant_result1.tab" ftype="tabular" name="output_quant" />
275 </test>
276 <test>
277 <param name="single_or_paired_opts" value="paired" />
278 <param name="input_mate1" value="reads_1.fastq" />
279 <param name="input_mate2" value="reads_2.fastq" />
203 <param name="biasCorrect" value="True" /> 280 <param name="biasCorrect" value="True" />
204 <param name="TranscriptSource" value="history" /> 281 <param name="TranscriptSource" value="history" />
205 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> 282 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
206 <output file="sailfish_quant_result1.tab" ftype="tabular" name="output_quant" /> 283 <output file="sailfish_bias_result1.tab" ftype="tabular" name="output_quant" />
207 <output file="sailfish_bias_result1.tab" ftype="tabular" name="output_bias_corrected_quant" /> 284 </test>
285 <test>
286 <param name="single_or_paired_opts" value="paired" />
287 <param name="input_mate1" value="reads_1.fastq" />
288 <param name="input_mate2" value="reads_2.fastq" />
289 <param name="biasCorrect" value="True" />
290 <param name="TranscriptSource" value="history" />
291 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
292 <param name="geneMap" value="gene_map.tab" ftype="tabular" />
293 <output file="sailfish_bias_result1.tab" ftype="tabular" name="output_quant" />
294 <output file="sailfish_genMap_result1.tab" ftype="tabular" name="output_gene_quant" />
208 </test> 295 </test>
209 </tests> 296 </tests>
210 <help> 297 <help><![CDATA[
211 <![CDATA[
212 298
213 **What it does** 299 **What it does**
214 300
215 Sailfish is a tool for transcript quantification from RNA-seq data. It 301 Sailfish is a tool for transcript quantification from RNA-seq data. It
216 requires a set of target transcripts (either from a reference or de-novo 302 requires a set of target transcripts (either from a reference or de-novo
334 420
335 The remaining salmon library format strings are not directly expressible in terms 421 The remaining salmon library format strings are not directly expressible in terms
336 of the TopHat library types, and so there is no direct mapping for them. 422 of the TopHat library types, and so there is no direct mapping for them.
337 423
338 424
339 ]]> 425 ]]></help>
340 </help> 426 <citations>
427 <citation type="doi">10.1038/nbt.2862</citation>
428 </citations>
341 </tool> 429 </tool>