comparison gsnap.xml @ 7:561503a442f0

refactor
author Jim Johnson <jj@umn.edu>
date Tue, 08 Nov 2011 13:26:41 -0600
parents
children a89fec682254
comparison
equal deleted inserted replaced
6:3be0e0a858fe 7:561503a442f0
1 <tool id="gsnap" name="GSNAP" version="2.0.0">
2 <description>Genomic Short-read Nucleotide Alignment Program</description>
3 <requirements>
4 <requirement type="binary">gsnap</requirement>
5 <!-- proposed tag for added datatype dependencies -->
6 <requirement type="datatype">gmapdb</requirement>
7 <requirement type="datatype">gmapsnpindex</requirement>
8 <requirement type="datatype">splicesites.iit</requirement>
9 <requirement type="datatype">introns.iit</requirement>
10 </requirements>
11 <version_string>gsnap --version</version_string>
12 <command>
13 #import os.path, re
14 gsnap
15 --nthreads="4" --ordered
16 #if $refGenomeSource.genomeSource == "gmapdb":
17 #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
18 --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name
19 #else:
20 --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
21 #end if
22 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
23 --kmer=$refGenomeSource.kmer
24 #end if
25 #if $refGenomeSource.use_splicing.src == 'gmapdb':
26 #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
27 -s $refGenomeSource.use_splicing.splicemap.value
28 #end if
29 #elif $refGenomeSource.use_splicing.src == 'history':
30 #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
31 -S $os.path.dirname($refGenomeSource.use_splicing.splicemap) -s $os.path.basename($refGenomeSource.use_splicing.splicemap)
32 #end if
33 #end if
34 #if $refGenomeSource.use_snps.src == 'gmapdb':
35 #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
36 -v $refGenomeSource.use_snps.snpindex.value
37 #end if
38 #elif $refGenomeSource.use_snps.src == 'history':
39 #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
40 -V $refGenomeSource.use_snps.snpindex.extra_files_path -v $refGenomeSource.use_snps.snpindex.metadata.snps_name
41 #end if
42 #end if
43 #if $refGenomeSource.mode.__str__ != '':
44 --mode=$refGenomeSource.mode
45 #end if
46 #if $mapq_unique_score.__str__ != '':
47 --mapq-unique-score=$mapq_unique_score
48 #end if
49 #if $computation.options == "advanced":
50 #if $computation.max_mismatches.__str__ != '':
51 --max-mismatches=$computation.max_mismatches
52 #end if
53 $computation.query_unk_mismatch
54 $computation.genome_unk_mismatch
55 #if $computation.terminal_threshold.__str__ != '':
56 --terminal-threshold=$computation.terminal_threshold
57 #end if
58 #if $computation.indel_penalty.__str__ != '':
59 --indel-penalty=$computation.indel_penalty
60 #end if
61 #if $computation.indel_endlength.__str__ != '':
62 --indel-endlength=$computation.indel_endlength
63 #end if
64 #if $computation.max_middle_insertions.__str__ != '':
65 --max-middle-insertions=$computation.max_middle_insertions
66 #end if
67 #if $computation.max_middle_deletions.__str__ != '':
68 --max-middle-deletions=$computation.max_middle_deletions
69 #end if
70 #if $computation.max_end_insertions.__str__ != '':
71 --max-end-insertions=$computation.max_end_insertions
72 #end if
73 #if $computation.max_end_deletions.__str__ != '':
74 --max-end-deletions=$computation.max_end_deletions
75 #end if
76 #if $computation.suboptimal_levels.__str__ != '':
77 --suboptimal-levels=$computation.suboptimal_levels
78 #end if
79 #if $computation.adapter_strip.__str__ != '':
80 --adapter-strip=$computation.adapter_strip
81 #end if
82 #if $computation.trim_mismatch_score.__str__ != '':
83 --trim-mismatch-score=$computation.trim_mismatch_score
84 #end if
85 ## TODO - do we need these options (Is it tally XOR runlength?):
86 ## --tallydir= --use-tally=tally
87 ## --runlengthdir --use-runlength=runlength
88 #if $computation.use_tally != None and len($computation.use_tally.__str__) > 0:
89 ##--tallydir $os.path.dirname($computation.use_tally) --use-tally $os.path.basename($computation.use_tally)
90 --use-tally=$computation.use_tally
91 #end if
92 ## gmap options
93 #if $computation.gmap_mode.__str__ != '' and $computation.gmap_mode.__str__ != 'None':
94 --gmap-mode='$computation.gmap_mode'
95 #end if
96 #if $computation.trigger_score_for_gmap.__str__ != '':
97 --trigger-score-for-gmap=$computation.trigger_score_for_gmap
98 #end if
99 #if $computation.max_gmap_pairsearch.__str__ != '' and $re.search("pairsearch",$computation.gmap_mode):
100 --max-gmap-pairsearch=$computation.max_gmap_pairsearch
101 #end if
102 #if $computation.max_gmap_terminal.__str__ != '' and $re.search("terminal",$computation.gmap_mode):
103 --max-gmap-terminal=$computation.max_gmap_terminal
104 #end if
105 #if $computation.max_gmap_improvement.__str__ != '' and $re.search("improv",$computation.gmap_mode):
106 --max-gmap-improvement=$computation.max_gmap_improvement
107 #end if
108 #if $computation.microexon_spliceprob.__str__ != '':
109 --microexon-spliceprob=$computation.microexon_spliceprob
110 #end if
111 #end if
112 #if $splicing.options == "advanced":
113 $splicing.novelsplicing
114 #if $splicing.localsplicedist.__str__ != '':
115 --localsplicedist=$splicing.localsplicedist
116 #end if
117 #if $splicing.local_splice_penalty.__str__ != '':
118 --local-splice-penalty=$splicing.local_splice_penalty
119 #end if
120 #if $splicing.distant_splice_penalty.__str__ != '':
121 --distant-splice-penalty=$splicing.distant_splice_penalty
122 #end if
123 #if $splicing.local_splice_endlength.__str__ != '':
124 --local-splice-endlength=$splicing.local_splice_endlength
125 #end if
126 #if $splicing.distant_splice_endlength.__str__ != '':
127 --distant-splice-endlength=$splicing.distant_splice_endlength
128 #end if
129 #if $splicing.distant_splice_identity.__str__ != '':
130 --distant-splice-identity=$splicing.distant_splice_identity
131 #end if
132 #end if
133 #if $output.options == "advanced":
134 #if $output.npath.__str__ != '':
135 --npath=$output.npath
136 #end if
137 $output.quiet_if_excessive
138 $output.show_refdiff
139 $output.clip_overlap
140 #end if
141 #if $result.format == "sam":
142 --format=sam
143 $result.no_sam_headers
144 #if $result.read_group_id.__str__.strip != '':
145 --read-group-id='$result.read_group_id'
146 #end if
147 #if $result.read_group_name.__str__ != '':
148 --read-group-name='$result.read_group_name'
149 #end if
150 #if $result.read_group_library.__str__ != '':
151 --read-group-library='$result.read_group_library'
152 #end if
153 #if $result.read_group_platform.__str__ != '':
154 --read-group-platform='$result.read_group_platform'
155 #end if
156 #if $result.quality_shift.__str__ != '':
157 --quality-shift=$result.quality_shift
158 #end if
159 #elif $result.format == "goby":
160 #if $result.goby_output.__str__ != '':
161 --goby-output='$result.goby_output'
162 #end if
163 #if $result.creads_window_start.__str__ != '':
164 --creads-window-start=$result.creads_window_start
165 #end if
166 #if $result.creads_window_end.__str__ != '':
167 --creads-window-end=$result.creads_window_end
168 #end if
169 $result.creads_complement
170 #end if
171 #if $results.split_output == 'yes':
172 --split-output=gsnap_out
173 #if $results.fails.choice == 'nofails':
174 --nofails
175 #elif $results.fails.choice == 'failsonly':
176 --failsonly
177 #end if
178 $results.fails_as_input
179 #else
180 #if $results.fails.choice == 'nofails':
181 --nofails
182 #elif $results.fails.choice == 'failsonly':
183 --failsonly
184 $results.fails.fails_as_input
185 #end if
186 #end if
187 #if $seq.format == "gsnap_fasta":
188 $seq.circularinput $seq.gsnap
189 #else if $seq.format == "fastq":
190 #if $seq.barcode_length.__str__ != '':
191 --barcode-length=$seq.barcode_length
192 #end if
193 #if $seq.fastq_id_start.__str__ != '':
194 --fastq-id-start=$seq.fastq_id_start
195 #end if
196 #if $seq.fastq_id_end.__str__ != '':
197 --fastq-id-end=$seq.fastq_id_end
198 #end if
199 #if $seq.filter_chastity.__str__ != 'off':
200 --filter-chastity=$seq.filter_chastity
201 #end if
202 #if $seq.paired.ispaired.__str__ == 'yes':
203 #if $seq.paired.pairmax_dna.__str__ != '':
204 --pairmax-dna=$seq.paired.pairmax_dna
205 #end if
206 #if $seq.paired.pairmax_rna.__str__ != '':
207 --pairmax-rna=$seq.paired.pairmax_rna
208 #end if
209 $seq.fastq $seq.paired.fastq
210 #else
211 $seq.fastq
212 #end if
213 #end if
214 #if $results.split_output == 'yes':
215 2> $gsnap_stderr
216 #else:
217 #if $results.fails.choice.__str__ == 'failsonly' and $results.fails.fails_as_input.__str__ != '':
218 2> $gsnap_stderr > $gsnap_fq
219 #else
220 2> $gsnap_stderr > $gsnap_out
221 #end if
222 #end if
223
224 </command>
225 <inputs>
226 <!-- Input data -->
227 <conditional name="seq">
228 <param name="format" type="select" label="&lt;H2&gt;Input Sequences&lt;/H2&gt;Select the input format" help="">
229 <option value="fastq">Fastq</option>
230 <!--
231 <option value="goby">Goby compact-reads</option>
232 -->
233 <option value="gsnap_fasta">GNSAP fasta</option>
234 </param>
235 <when value="fastq">
236 <param name="fastq" type="data" format="fastq" label="Select a fastq dataset" />
237 <conditional name="paired">
238 <param name="ispaired" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use Paired Reads?"/>
239 <when value="no"/>
240 <when value="yes">
241 <param name="fastq" type="data" format="fastq" label="Select the paired reads reverse dataset" />
242 <param name="orientation" type="select" label="Orientation of paired-end reads" help="">
243 <option value="FR">fwd-rev, typical Illumina default</option>
244 <option value="RF">rev-fwd, for circularized inserts</option>
245 <option value="FF">fwd-fwd, same strand</option>
246 </param>
247 <param name="pairmax_dna" type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/>
248 <param name="pairmax_rna" type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used novelspliceing is specified or a splice file is provided. Should probably match the value for localsplicedist."/>
249 </when>
250 </conditional>
251 <param name="barcode_length" type="integer" value="" optional="true" label="Amount of barcode to remove from start of read (default 0)" />
252 <param name="fastq_id_start" type="integer" value="" optional="true" label="Starting field of identifier in FASTQ header, whitespace-delimited, starting from 1" />
253 <param name="fastq_id_end" type="integer" value="" optional="true" label="Ending field of identifier in FASTQ header, whitespace-delimited, starting from 1"
254 help="Examples:
255 &lt;br&gt;@HWUSI-EAS100R:6:73:941:1973#0/1
256 &lt;br&gt; . start=1, end=1 (default) => identifier is HWUSI-EAS100R:6:73:941:1973#0/1
257 &lt;br&gt;@SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
258 &lt;br&gt; . start=1, end=1 => identifier is SRR001666.1
259 &lt;br&gt; . start=2, end=2 => identifier is 071112_SLXA-EAS1_s_7:5:1:817:345
260 &lt;br&gt; . start=1, end=2 => identifier is SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345"
261 />
262 <param name="filter_chastity" type="select" label="Skip reads marked by the Illumina chastity program"
263 help="String after the accession having a 'Y' after the first colon, like this:
264 &lt;br&gt;@accession 1:Y:0:CTTGTA
265 &lt;br&gt;where the 'Y' signifies filtering by chastity.
266 &lt;br&gt; For 'either', a 'Y' on either end of a paired-end read will be filtered.
267 &lt;br&gt; For 'both', a 'Y' is required on both ends of a paired-end read (or on the only end of a single-end read)"
268 >
269 <option value="off">off - no filtering</option>
270 <option value="either">either - a 'Y' on either end of a paired-end read</option>
271 <option value="both">both - a 'Y' is required on both ends of a paired-end read or the only end of a single-end read</option>
272 </param>
273 </when>
274 <!--
275 <when value="goby">
276 </when>
277 -->
278 <when value="gsnap_fasta">
279 <param name="gsnap" type="data" format="fasta" label="Select a single-end dataset" help="GSNAP fasta must have the sequence entirely on one line, a second line is interpreted as the paired-end sequence"/>
280 <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/>
281 </when>
282
283 </conditional>
284 <param name="mapq_unique_score" type="integer" value="" optional="true" label="MAPQ score threshold"
285 help="For multiple results, consider as a unique result if only one of the results has a MAPQ score equal or greater than this
286 (if not selected, then reports all multiple results, up to npaths)" />
287
288 <!-- GMAPDB for alignment -->
289 <conditional name="refGenomeSource">
290 <param name="genomeSource" type="select" label="&lt;HR&gt;&lt;H2&gt;Align To&lt;/H2&gt;Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
291 <option value="indexed">Use a built-in index</option>
292 <option value="gmapdb">Use a gmapdb from your history</option>
293 </param>
294 <when value="indexed">
295 <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
296 <options from_file="gmap_indices.loc">
297 <column name="uid" index="0" />
298 <column name="dbkey" index="1" />
299 <column name="name" index="2" />
300 <column name="kmers" index="3" />
301 <column name="maps" index="4" />
302 <column name="snps" index="5" />
303 <column name="value" index="6" />
304 </options>
305 </param>
306
307 <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
308 <options from_file="gmap_indices.loc">
309 <column name="name" index="3"/>
310 <column name="value" index="3"/>
311 <filter type="param_value" ref="gmapindex" column="6"/>
312 <filter type="multiple_splitter" column="3" separator=","/>
313 <filter type="add_value" name="" value=""/>
314 <filter type="sort_by" column="3"/>
315 </options>
316 </param>
317
318 <param name="mode" type="select" label="Alignment mode" help="Assumes cmetindex and atoiindex were run on the gmap datatbase.">
319 <option value="">standard</option>
320 <option value="cmet-stranded">cmet-stranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
321 <option value="cmet-nonstranded">cmet-nonstranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
322 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option>
323 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option>
324 </param>
325
326 <conditional name="use_splicing">
327 <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns"
328 help="Look for splicing involving known sites or known introns at short or long distances
329 See README instructions for the distinction between known sites and known introns">
330 <option value="none" selected="true">None</option>
331 <option value="gmapdb">From the GMAP Database</option>
332 <option value="history">A Map in your history</option>
333 </param>
334 <when value="none"/>
335 <when value="history">
336 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map"
337 help="built with GMAP IIT"/>
338 </when>
339 <when value="gmapdb">
340 <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help="">
341 <options from_file="gmap_indices.loc">
342 <column name="name" index="4"/>
343 <column name="value" index="4"/>
344 <filter type="param_value" ref="gmapindex" column="6"/>
345 <filter type="multiple_splitter" column="4" separator=","/>
346 <filter type="add_value" name="" value=""/>
347 <filter type="sort_by" column="4"/>
348 </options>
349 </param>
350 </when>
351 </conditional>
352
353 <conditional name="use_snps">
354 <param name="src" type="select" label="&lt;HR&gt;Known SNPs" help="for SNP tolerant alignments">
355 <option value="none" selected="true">None</option>
356 <option value="gmapdb">From the GMAP Database</option>
357 <option value="history">A SNP Index in your history</option>
358 </param>
359 <when value="none"/>
360 <when value="history">
361 <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex"
362 help="built with GMAP SNP Index"/>
363 </when>
364 <when value="gmapdb">
365 <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help="">
366 <options from_file="gmap_indices.loc">
367 <column name="name" index="5"/>
368 <column name="value" index="5"/>
369 <filter type="param_value" ref="gmapindex" column="6"/>
370 <filter type="multiple_splitter" column="5" separator=","/>
371 <filter type="add_value" name="" value=""/>
372 <filter type="sort_by" column="5"/>
373 </options>
374 </param>
375 </when>
376 </conditional>
377
378 </when>
379 <when value="gmapdb">
380 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb"
381 help="A GMAP database built with GMAP Build"/>
382 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
383 <options>
384 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
385 </options>
386 </param>
387
388 <param name="mode" type="select" label="Alignment mode" help="Assumes cmetindex and atoiindex were run on the gmap datatbase.">
389 <option value="">standard</option>
390 <option value="cmet-stranded">cmet-stranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
391 <option value="cmet-nonstranded">cmet-nonstranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
392 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option>
393 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option>
394 </param>
395
396 <conditional name="use_splicing">
397 <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns"
398 help="Look for splicing involving known sites or known introns at short or long distances
399 See README instructions for the distinction between known sites and known introns">
400 <option value="none" selected="true">None</option>
401 <option value="gmapdb">From the GMAP Database</option>
402 <option value="history">A Map in your history</option>
403 </param>
404 <when value="none"/>
405 <when value="history">
406 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map"
407 help="built with GMAP IIT"/>
408 </when>
409 <when value="gmapdb">
410 <param name="splicemap" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
411 <options>
412 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
413 </options>
414 </param>
415 </when>
416 </conditional>
417
418 <conditional name="use_snps">
419 <param name="src" type="select" label="&lt;HR&gt;Known SNPs" help="for SNP tolerant alignments">
420 <option value="none" selected="true">None</option>
421 <option value="gmapdb">From the GMAP Database</option>
422 <option value="history">A SNP Index in your history</option>
423 </param>
424 <when value="none"/>
425 <when value="history">
426 <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex"
427 help="built with GMAP SNP Index"/>
428 </when>
429 <when value="gmapdb">
430 <param name="snpindex" type="select" data_ref="gmapdb" label="Use database containing known SNPs" help="">
431 <options>
432 <filter type="data_meta" ref="gmapdb" key="snps" multiple="True" separator=","/>
433 </options>
434 </param>
435 </when>
436 </conditional>
437
438 </when>
439 </conditional>
440
441 <!-- Computation options -->
442 <conditional name="computation">
443 <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
444 <option value="default">Use default settings</option>
445 <option value="advanced">Set Computation Options</option>
446 </param>
447 <when value="default"/>
448 <when value="advanced">
449 <param name="max_mismatches" type="float" value="" optional="true" label="Maximum number of mismatches allowed (uses default when negative)"
450 help="Defaults to the ultrafast level of ((readlength+2)/12 - 2)).
451 If specified between 0.0 and 1.0, then treated as a fraction
452 of each read length. Otherwise, treated as an integral number
453 of mismatches (including indel and splicing penalties)
454 For RNA-Seq, you may need to increase this value slightly
455 to align reads extending past the ends of an exon.">
456 <validator type="in_range" message="The mismatches must >= 0." min="0."/>
457 </param>
458 <param name="query_unk_mismatch" type="boolean" checked="false" truevalue="--query-unk-mismatch=1" falsevalue="" label="Count unknown (N) characters in the query as a mismatch"/>
459 <param name="genome_unk_mismatch" type="boolean" checked="true" truevalue="" falsevalue="--genome-unk-mismatch=0" label="Count unknown (N) characters in the genome as a mismatch"/>
460 <param name="terminal_threshold" type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 3)"
461 help="(from one end of the read to the best possible position at the other end). To turn off terminal alignments, set this to a high value." />
462 <param name="indel_penalty" type="integer" value="" optional="true" label="Penalty for an indel (default 2)"
463 help="Counts against mismatches allowed. To find indels, make indel-penalty less than or equal to max-mismatches. A value &lt; 2 can lead to false positives at read ends" />
464 <param name="indel_endlength" type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" />
465 <param name="max_middle_insertions" type="integer" value="" optional="true" label="Maximum number of middle insertions allowed (default 9)" />
466 <param name="max_middle_deletions" type="integer" value="" optional="true" label="Maximum number of middle deletions allowed (default 30)" />
467 <param name="max_end_insertions" type="integer" value="" optional="true" label="Maximum number of end insertions allowed (default 3)" />
468 <param name="max_end_deletions" type="integer" value="" optional="true" label="Maximum number of end deletions allowed (default 6)" />
469 <param name="suboptimal_levels" type="integer" value="" optional="true" label="Report suboptimal hits beyond best hit (default 0)"
470 help="All hits with best score plus suboptimal-levels are reported" />
471 <param name="adapter_strip" type="select" label="Method for removing adapters from reads"
472 help="paired removes adapters from paired-end reads if a concordant or paired alignment cannot be found from the original read">
473 <option value="paired" selected="true">paired</option>
474 <option value="off">off</option>
475 </param>
476 <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)"
477 help="to turn off trimming, specify 0"/>
478 <param name="use_tally" type="data" format="tally.iit" optional="true" metadata_name="dbkey" label="Select a tally IIT file to resolve concordant multiple results"
479 help="generated by gsnap_tally and iit_store"/>
480
481 <!--
482 tallydir=STRING Directory for tally IIT file to resolve concordant multiple results (default is
483 location of genome index files specified using -D and -d). Note: can
484 just give full path name to use-tally instead.
485 use-tally=STRING Use this tally IIT file to resolve concordant multiple results
486 runlengthdir=STRING Directory for runlength IIT file to resolve concordant multiple results (default is
487 location of genome index files specified using -D and -d). Note: can
488 just give full path name to use-runlength instead.
489 use-runlength=STRING Use this runlength IIT file to resolve concordant multiple results
490 -->
491
492 <!-- Options for GMAP alignment within GSNAP -->
493 <param name="gmap_mode" type="select" multiple="true" optional="true" display="checkboxes" label="Cases to use GMAP for complex alignments containing multiple splices or indels"
494 help="Default: pairsearch,terminal,improve">
495 <option value="pairsearch" selected="true">pairsearch</option>
496 <option value="terminal" selected="true">terminal</option>
497 <option value="improve" selected="true">improve</option>
498 </param>
499 <param name="trigger_score_for_gmap" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 5)"
500 help="Try GMAP pairsearch on nearby genomic regions if best score (the total of both ends if paired-end) exceeds this value (default 5)" />
501 <param name="max_gmap_pairsearch" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 3)"
502 help="Perform GMAP pairsearch on nearby genomic regions up to this many candidate ends (default 3)." />
503 <param name="max_gmap_terminal" type="integer" value="" optional="true" label="GMAP terminal threshold (default 3)"
504 help="Perform GMAP terminal on nearby genomic regions up to this many candidate ends (default 3)." />
505 <param name="max_gmap_improvement" type="integer" value="" optional="true" label="GMAP improvement threshold (default 3)"
506 help="Perform GMAP improvement on nearby genomic regions up to this many candidate ends (default 3)." />
507 <param name="microexon_spliceprob" type="float" value="" optional="true" label="GMAP microexons threshold (default .90)"
508 help="Allow microexons only if one of the splice site probabilities is greater than this value." >
509 <validator type="in_range" message="The microexons probability must be between 0. and 1." min="0." max="1."/>
510 </param>
511 </when>
512 </conditional>
513
514 <conditional name="splicing">
515 <param name="options" type="select" label="&lt;HR&gt;Splicing options for RNA-Seq" help="">
516 <option value="default">Use default settings</option>
517 <option value="advanced">Set Splicing Options</option>
518 </param>
519 <when value="default"/>
520 <when value="advanced">
521 <!-- Splicing options for RNA-Seq -->
522 <!-- use-splicing This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splicing -->
523 <!-- Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic) -->
524 <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/>
525 <param name="localsplicedist" type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/>
526 <param name="local_splice_penalty" type="integer" value="" optional="true" label="Penalty for a local splice (default 0). Counts against mismatches allowed"/>
527 <param name="distant_splice_penalty" type="integer" value="" optional="true" label="Penalty for a distant splice (default 3). Counts against mismatches allowed"
528 help="A distant splice is one where the intron length exceeds the value of localsplicedist or is an
529 inversion, scramble, or translocation between two different chromosomes. Counts against mismatches allowed"/>
530 <param name="distant_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for distant spliced alignments"
531 help="(default 16, min is the kmer length)"/>
532 <param name="shortend_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for short-end spliced alignments"
533 help="(default 2, but unless known splice sites are provided, GSNAP may still need the end length to be the value of kmer size to find a given splice"/>
534 <param name="distant_splice_identity" type="float" value="" optional="true" label="Minimum identity at end required for distant spliced alignments (default 0.95)"/>
535 <param name="antistranded_penalty" type="integer" value="" optional="true" label="Penalty for antistranded splicing when using stranded RNA-Seq protocols"
536 help="A positive value, such as 1, expects antisense on the first read and sense on the second read.
537 Default is 0, which treats sense and antisense equally well"/>
538 </when>
539 </conditional>
540
541 <!-- Output data -->
542 <conditional name="output">
543 <param name="options" type="select" label="&lt;HR&gt;&lt;H2&gt;Output&lt;/H2&gt;Output options for RNA-Seq" help="">
544 <option value="default">Use default settings</option>
545 <option value="advanced">Set Output Options</option>
546 </param>
547 <when value="default"/>
548 <when value="advanced">
549 <param name="npath" type="integer" value="" optional="true" label="Maximum number of paths to print (default 100)"/>
550 <param name="quiet_if_excessive" type="boolean" checked="false" truevalue="--quiet-if-excessive" falsevalue="" label="Quiet if Excessive"
551 help="If more than maximum number of paths are found, then nothing is printed."/>
552 <param name="show_refdiff" type="boolean" checked="false" truevalue="--show-refdiff" falsevalue="" label="Show SNP-tolerant alignment"
553 help="For GSNAP output in SNP-tolerant alignment, shows all differences relative to the reference genome as lower case (otherwise, it shows all differences relative to both the reference and alternate genome)"/>
554 <param name="clip_overlap" type="boolean" checked="false" truevalue="--clip-overlap" falsevalue="" label="Clip Overlap"
555 help="For paired-end reads whose alignments overlap, clip the overlapping region."/>
556 </when>
557 </conditional>
558 <conditional name="result">
559 <param name="format" type="select" label="Select the output format" help="">
560 <option value="sam">SAM</option>
561 <!-- goby should only be an option if the input is in goby format
562 <option value="goby">Goby</option>
563 -->
564 <option value="gsnap">GSNAP default output</option>
565 </param>
566 <when value="gsnap">
567 </when>
568 <when value="sam">
569 <param name="no_sam_headers" type="boolean" truevalue="--no-sam-headers" falsevalue="" checked="false" label="Do not print headers beginning with '@'"/>
570 <param name="read_group_id" type="text" value="" optional="true" label="Value to put into read-group id (RG-ID) field"/>
571 <param name="read_group_name" type="text" value="" optional="true" label="Value to put into read-group name (RG-SM) field"/>
572 <param name="read_group_library" type="text" value="" optional="true" label="Value to put into read-group library (RG-LB) field"/>
573 <param name="read_group_platform" type="text" value="" optional="true" label="Value to put into read-group library platform (RG-PL) field"/>
574 <param name="quality_shift" type="integer" value="" optional="true" label="Shift FASTQ quality scores by this amount in SAM output (default -31)"/>
575 </when>
576 <!--
577 <when value="goby">
578 <param name="goby_output" type="text" value="" label="Basename for Goby output files"/>
579 <param name="creads_window_start" type="integer" value="" optional="true" label="Compact reads window start (default: 0=start of file)"/>
580 <param name="creads_window_end" type="integer" value="" optional="true" label="Compact reads window end (default: 0=end of file)"/>
581 <param name="creads_complement" type="boolean" truevalue="-\-creads-complement" falsevalue="" checked="false" label="Complement read sequences (without reversing)"/>
582 </when>
583 -->
584 </conditional>
585 <!-- TODO combine fails and split_output -->
586
587 <conditional name="results">
588 <param name="split_output" type="select" label="&lt;HR&gt;Split outputs"
589 help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results">
590 <option value="no">no</option>
591 <option value="yes">yes</option>
592 </param>
593 <when value="no">
594 <conditional name="fails">
595 <param name="choice" type="select" label="How to deal with fails" help="">
596 <option value="default">default - include them in results</option>
597 <option value="nofails">nofails - exclude fails from results</option>
598 <option value="failsonly">failsonly - only output failing results</option>
599 </param>
600 <when value="default"/>
601 <when value="nofails"/>
602 <when value="failsonly">
603 <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format"
604 help=""/>
605 </when>
606 </conditional>
607 </when>
608 <when value="yes">
609 <conditional name="fails">
610 <param name="choice" type="select" label="How to deal with fails" help="">
611 <option value="default">default - include them in results</option>
612 <option value="nofails">nofails - exclude fails from results</option>
613 <option value="failsonly">failsonly - only output failing results</option>
614 </param>
615 <when value="default"/>
616 <when value="nofails"/>
617 <when value="failsonly"/>
618 </conditional>
619 <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format"
620 help=""/>
621 </when>
622 </conditional>
623
624 </inputs>
625 <outputs>
626 <data format="txt" name="gsnap_stderr" label="${tool.name} on ${on_string}: gsnap.log"/>
627
628 <data format="txt" name="gsnap_out" label="${tool.name} on ${on_string} ${result.format}" >
629 <filter>(results['split_output'] == 'no' and (results['fails']['choice'] != 'failsonly' or results['fails']['fails_as_input'] == False))</filter>
630 <change_format>
631 <when input="result['format']" value="sam" format="sam"/>
632 <when input="result['format']" value="gsnap" format="gsnap"/>
633 </change_format>
634 </data>
635
636 <data format="fastq" name="gsnap_fq" label="${tool.name} on ${on_string} fails.fq" >
637 <filter>(results['split_output'] == 'no' and results['fails']['choice'] == 'failsonly' and results['fails']['fails_as_input'] == True)</filter>
638 </data>
639
640 <!-- nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, concordant_mult -->
641
642 <data format="txt" name="unpaired_mult" label="${tool.name} on ${on_string} unpaired_mult.${result.format}" from_work_dir="gsnap_out.unpaired_mult">
643 <filter>(results['split_output'] == 'yes')</filter>
644 <change_format>
645 <when input="result['format']" value="sam" format="sam"/>
646 <when input="result['format']" value="gsnap" format="gsnap"/>
647 </change_format>
648 </data>
649 <data format="txt" name="unpaired_uniq" label="${tool.name} on ${on_string} unpaired_uniq.${result.format}" from_work_dir="gsnap_out.unpaired_uniq">
650 <filter>(results['split_output'] == 'yes')</filter>
651 <change_format>
652 <when input="result['format']" value="sam" format="sam"/>
653 <when input="result['format']" value="gsnap" format="gsnap"/>
654 </change_format>
655 </data>
656 <data format="txt" name="unpaired_transloc" label="${tool.name} on ${on_string} unpaired_transloc.${result.format}" from_work_dir="gsnap_out.unpaired_transloc">
657 <filter>(results['split_output'] == 'yes')</filter>
658 <change_format>
659 <when input="result['format']" value="sam" format="sam"/>
660 <when input="result['format']" value="gsnap" format="gsnap"/>
661 </change_format>
662 </data>
663 <data format="txt" name="halfmapping_mult" label="${tool.name} on ${on_string} halfmapping_mult.${result.format}" from_work_dir="gsnap_out.halfmapping_mult">
664 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
665 <change_format>
666 <when input="result['format']" value="sam" format="sam"/>
667 <when input="result['format']" value="gsnap" format="gsnap"/>
668 </change_format>
669 </data>
670 <data format="txt" name="halfmapping_uniq" label="${tool.name} on ${on_string} halfmapping_uniq.${result.format}" from_work_dir="gsnap_out.halfmapping_uniq">
671 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
672 <change_format>
673 <when input="result['format']" value="sam" format="sam"/>
674 <when input="result['format']" value="gsnap" format="gsnap"/>
675 </change_format>
676 </data>
677 <data format="txt" name="halfmapping_transloc" label="${tool.name} on ${on_string} halfmapping_transloc.${result.format}" from_work_dir="gsnap_out.halfmapping_transloc">
678 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
679 <change_format>
680 <when input="result['format']" value="sam" format="sam"/>
681 <when input="result['format']" value="gsnap" format="gsnap"/>
682 </change_format>
683 </data>
684 <data format="txt" name="paired_mult" label="${tool.name} on ${on_string} paired_mult.${result.format}" from_work_dir="gsnap_out.paired_mult">
685 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
686 <change_format>
687 <when input="result['format']" value="sam" format="sam"/>
688 <when input="result['format']" value="gsnap" format="gsnap"/>
689 </change_format>
690 </data>
691 <data format="txt" name="paired_uniq" label="${tool.name} on ${on_string} paired_uniq.${result.format}" from_work_dir="gsnap_out.paired_uniq">
692 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
693 <change_format>
694 <when input="result['format']" value="sam" format="sam"/>
695 <when input="result['format']" value="gsnap" format="gsnap"/>
696 </change_format>
697 </data>
698 <data format="txt" name="paired_transloc" label="${tool.name} on ${on_string} paired_transloc.${result.format}" from_work_dir="gsnap_out.paired_transloc">
699 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
700 <change_format>
701 <when input="result['format']" value="sam" format="sam"/>
702 <when input="result['format']" value="gsnap" format="gsnap"/>
703 </change_format>
704 </data>
705
706 <data format="txt" name="concordant_mult" label="${tool.name} on ${on_string} concordant_mult.${result.format}" from_work_dir="gsnap_out.concordant_mult">
707 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
708 <change_format>
709 <when input="result['format']" value="sam" format="sam"/>
710 <when input="result['format']" value="gsnap" format="gsnap"/>
711 </change_format>
712 </data>
713 <data format="txt" name="concordant_uniq" label="${tool.name} on ${on_string} concordant_uniq.${result.format}" from_work_dir="gsnap_out.concordant_uniq">
714 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
715 <change_format>
716 <when input="result['format']" value="sam" format="sam"/>
717 <when input="result['format']" value="gsnap" format="gsnap"/>
718 </change_format>
719 </data>
720 <data format="txt" name="concordant_transloc" label="${tool.name} on ${on_string} concordant_transloc.${result.format}" from_work_dir="gsnap_out.concordant_transloc">
721 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
722 <change_format>
723 <when input="result['format']" value="sam" format="sam"/>
724 <when input="result['format']" value="gsnap" format="gsnap"/>
725 </change_format>
726 </data>
727
728 <data format="txt" name="nomapping" label="${tool.name} on ${on_string} nomapping.${result.format}" from_work_dir="gsnap_out.nomapping">
729 <filter>(results['split_output'] == 'yes' and results['fails_as_input'] == False)</filter>
730 <change_format>
731 <when input="result['format']" value="sam" format="sam"/>
732 <when input="result['format']" value="gsnap" format="gsnap"/>
733 </change_format>
734 </data>
735
736 <data format="fastq" name="nomapping_fq" label="${tool.name} on ${on_string} nomapping.fq" from_work_dir="gsnap_out.nomapping.fq">
737 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == False)</filter>
738 </data>
739
740 <data format="fastq" name="nomapping_1_fq" label="${tool.name} on ${on_string} nomapping.1.fq" from_work_dir="gsnap_out.nomapping.1.fq">
741 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
742 </data>
743
744 <data format="fastq" name="nomapping_2_fq" label="${tool.name} on ${on_string} nomapping.2.fq" from_work_dir="gsnap_out.nomapping.2.fq">
745 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
746 </data>
747
748 <!-- Will problay need wrapper code to generate composite datatype for goby alignment
749 <data format="gobyalignment" name="goby_alignment" label="${tool.name} on ${on_string} uniq.${result.format}" from_work_dir="gsnap_out.nomapping">
750 <filter>result['format'] == 'goby'</filter>
751 </data>
752 -->
753
754 </outputs>
755 <tests>
756 </tests>
757
758 <help>
759
760 **What it does**
761
762 GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc.
763 Publication_ citation: Thomas D. Wu, Serban Nacu "Fast and SNP-tolerant detection of complex variants and splicing in short reads. Bioinformatics. 2010 Apr 1;26(7):873-81. Epub 2010 Feb 10.
764
765 .. _GSNAP: http://research-pub.gene.com/gmap/
766 .. _Publication: http://bioinformatics.oupjournals.org/cgi/content/full/26/7/873
767 http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed
768
769 ------
770
771 **Know what you are doing**
772
773 .. class:: warningmark
774
775 You will want to read the README_
776
777 .. _README: http://research-pub.gene.com/gmap/src/README
778
779 ------
780
781 **Input formats**
782
783 Input to GSNAP should be either in FASTQ or FASTA format.
784
785 The FASTQ input may include quality scores, which will then be included in SAM
786 output, if that output format is selected.
787
788 For FASTA format, you should include one line per read (or end of a
789 paired-end read). The same FASTA file can have a mixture of
790 single-end and paired-end reads of varying lengths, if desired.
791
792 Single-end reads:
793
794 Each FASTA entry should contain one short read per line, like this
795
796 >Header information
797 AAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTA
798
799 Each short read can have a different length. However, the entire read
800 needs to be on a single line, and may not wrap around multiple lines.
801 If it extends to a second line, GSNAP will think that the read is
802 paired-end.
803
804
805 Paired-end reads:
806
807 Each FASTA entry should contain two short reads, one per line, like
808 this
809
810 >Header information
811 AAAACATTCTCCTCCGCATAAGCCTAGTAGATTA
812 GGCGTAGGTAGAAGTAGAGGTTAAGGCGCGTCAG
813
814 By default, the program assumes that the second end is in the reverse
815 complement direction compared with the first end. If they are in the
816 same direction, you may need to use the --circular-input (or -c) flag.
817
818 ( The Galaxy tool: "FASTA Width formatter" can be used to reformat fasta files to have single line sequences. )
819
820 ------
821
822 **Output formats in GSNAP**
823
824 SAM output format
825
826 Default GSNAP format
827 See the README_
828
829
830
831
832 </help>
833 </tool>
834