comparison sickle.xml @ 0:a5f56370e870 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sickle commit 128d3f255f00c47fa2b16d9b7432d48a089660c1
author iuc
date Thu, 12 Nov 2015 07:05:48 -0500
parents
children 43e081d32f90
comparison
equal deleted inserted replaced
-1:000000000000 0:a5f56370e870
1 <tool id="sickle" name="Sickle" version="1.33">
2 <description>windowed adaptive trimming of FASTQ data</description>
3 <requirements>
4 <requirement type="package" version="1.33">sickle</requirement>
5 </requirements>
6 <version_command>sickle --version | head -n 1</version_command>
7 <command>
8 sickle
9
10 #if str($readtype.single_or_paired) == "se":
11 se -f "${readtype.input_single}" -o "$output_single"
12
13 #if $readtype.input_single.ext in ("fastq", "fastqsanger"):
14 -t sanger
15 #else if $readtype.input_single.ext == "fastqillumina":
16 -t illumina
17 #else if $readtype.input_single.ext == "fastqsolexa":
18 -t solexa
19 #end if
20 #end if
21
22 #if str($readtype.single_or_paired) == "pe_combo":
23 #if $readtype.output_n:
24 pe -c "${readtype.input_combo}" -M "$output_combo"
25 #else
26 pe -c "${readtype.input_combo}" -m "$output_combo" -s "$output_combo_single"
27 #end if
28
29 #if $readtype.input_combo.ext in ("fastq", "fastqsanger"):
30 -t sanger
31 #else if $readtype.input_combo.ext == "fastqillumina":
32 -t illumina
33 #else if $readtype.input_combo.ext == "fastqsolexa":
34 -t solexa
35 #end if
36 #end if
37
38 #if str($readtype.single_or_paired) == "pe_sep":
39 pe -f "${readtype.input_paired1}" -r "${readtype.input_paired2}" -o "$output_paired1" -p "$output_paired2" -s "$output_paired_single"
40
41 #if $readtype.input_paired1.ext in ("fastq", "fastqsanger"):
42 -t sanger
43 #else if $readtype.input_paired1.ext == "fastqillumina":
44 -t illumina
45 #else if $readtype.input_paired1.ext == "fastqsolexa":
46 -t solexa
47 #end if
48 #end if
49
50 #if str($qual_threshold) != "":
51 -q $qual_threshold
52 #end if
53
54 #if str($length_threshold) != "":
55 -l $length_threshold
56 #end if
57
58 #if $no_five_prime:
59 -x
60 #end if
61
62 #if $trunc_n:
63 -n
64 #end if
65 </command>
66
67 <inputs>
68 <conditional name="readtype">
69 <param name="single_or_paired" type="select" label="Single-end or paired-end reads?" help="Note: Sickle will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger.">
70 <option value="se" selected="true">Single-end</option>
71 <option value="pe_combo">Paired-end (one interleaved input file)</option>
72 <option value="pe_sep">Paired-end (two separate input files)</option>
73 </param>
74
75 <when value="se">
76 <param format="fastq" name="input_single" type="data" label="Single-end FASTQ reads" help="(-f)" />
77 </when>
78
79 <when value="pe_combo">
80 <param format="fastq" name="input_combo" type="data" label="Paired-end interleaved FASTQ reads" help="(-c)" />
81 <param name="output_n" type="boolean" label="Output only one file with all reads" help="This will output only one file with all the reads, where the reads that did not pass filter will be replaced with a single 'N', rather than discarded."/>
82 </when>
83
84 <when value="pe_sep">
85 <param format="fastq" name="input_paired1" type="data" label="Paired-end forward strand FASTQ reads" help="(-f)" />
86 <param format="fastq" name="input_paired2" type="data" label="Paired-end reverse strand FASTQ reads" help="(-r)" />
87 </when>
88 </conditional>
89
90 <param name="qual_threshold" value="20" min="0" type="integer" optional="true" label="Quality threshold" help="Threshold for trimming based on average quality in a window (-q)" />
91
92 <param name="length_threshold" value="20" min="0" type="integer" optional="true" label="Length threshold" help="Threshold to keep a read based on length after trimming (-l)" />
93
94 <param name="no_five_prime" type="boolean" label="Don't do 5' trimming" help="(-x)" />
95 <param name="trunc_n" type="boolean" label="Truncate sequences with Ns at first N position" help="(-n)" />
96 </inputs>
97
98 <outputs>
99 <data name="output_single" format_source="input_single" label="Single-end output of ${tool.name} on ${on_string}">
100 <filter>readtype['single_or_paired'] == 'se'</filter>
101 </data>
102
103 <data name="output_combo" format_source="input_combo" label="Paired-end interleaved output of ${tool.name} on ${on_string}">
104 <filter>readtype['single_or_paired'] == 'pe_combo'</filter>
105 </data>
106
107 <data name="output_combo_single" format_source="input_combo" label="Singletons from paired-end interleaved output of ${tool.name} on ${on_string}">
108 <filter>readtype['single_or_paired'] == 'pe_combo' and not readtype['output_n']</filter>
109 </data>
110
111 <data name="output_paired1" format_source="input_paired1" label="Paired-end forward strand output of ${tool.name} on ${on_string}">
112 <filter>readtype['single_or_paired'] == 'pe_sep'</filter>
113 </data>
114
115 <data name="output_paired2" format_source="input_paired2" label="Paired-end reverse strand output of ${tool.name} on ${on_string}">
116 <filter>readtype['single_or_paired'] == 'pe_sep'</filter>
117 </data>
118
119 <data name="output_paired_single" format_source="input_paired1" label="Singletons from paired-end output of ${tool.name} on ${on_string}">
120 <filter>readtype['single_or_paired'] == 'pe_sep'</filter>
121 </data>
122 </outputs>
123 <tests>
124 <test>
125 <param name="single_or_paired" value="pe_combo" />
126 <param name="input_combo" value="test.fastq" />
127 <param name="qual_threshold" value="34" />
128 <output name="output_combo" file="output.c1.fastq" />
129 <output name="output_combo_single" file="output.s.fastq" />
130 </test>
131 <test>
132 <param name="single_or_paired" value="pe_combo" />
133 <param name="input_combo" value="test.fastq" />
134 <param name="qual_threshold" value="34" />
135 <param name="output_n" value="true" />
136 <output name="output_combo" file="output.c2.fastq" />
137 </test>
138 <test>
139 <param name="single_or_paired" value="pe_sep" />
140 <param name="input_paired1" value="test.f.fastq" />
141 <param name="input_paired2" value="test.r.fastq" />
142 <param name="qual_threshold" value="34" />
143 <output name="output_paired1" file="output.f.fastq" />
144 <output name="output_paired2" file="output.r.fastq" />
145 <output name="output_paired_single" file="output.s.fastq" />
146 </test>
147 </tests>
148 <help>
149 **What it does**
150
151 Most modern sequencing technologies produce reads that have
152 deteriorating quality towards the 3'-end and some towards the 5'-end
153 as well. Incorrectly called bases in both regions negatively impact
154 assembles, mapping, and downstream bioinformatics analyses.
155
156 Sickle is a tool that uses sliding windows along with quality and
157 length thresholds to determine when quality is sufficiently low to
158 trim the 3'-end of reads and also determines when the quality is
159 sufficiently high enough to trim the 5'-end of reads. It will also
160 discard reads based upon the length threshold. It takes the quality
161 values and slides a window across them whose length is 0.1 times the
162 length of the read. If this length is less than 1, then the window is
163 set to be equal to the length of the read. Otherwise, the window
164 slides along the quality values until the average quality in the
165 window rises above the threshold, at which point the algorithm
166 determines where within the window the rise occurs and cuts the read
167 and quality there for the 5'-end cut. Then when the average quality
168 in the window drops below the threshold, the algorithm determines
169 where in the window the drop occurs and cuts both the read and quality
170 strings there for the 3'-end cut. However, if the length of the
171 remaining sequence is less than the minimum length threshold, then the
172 read is discarded entirely (or replaced with an "N" record). 5'-end
173 trimming can be disabled. Sickle also has an option to truncate reads
174 with Ns at the first N position.
175
176 Sickle supports three types of quality values: Illumina, Solexa, and
177 Sanger. Note that the Solexa quality setting is an approximation (the
178 actual conversion is a non-linear transformation). The end
179 approximation is close. Illumina quality refers to qualities encoded
180 with the CASAVA pipeline between versions 1.3 and 1.7. Illumina
181 quality using CASAVA >= 1.8 is Sanger encoded. The quality value will
182 be determined from the datatype of the data, i.e. a fastqsanger datatype
183 is assumed to be Sanger encoded.
184
185 Note that Sickle will remove the 2nd FASTQ record header (on the "+"
186 line) and replace it with simply a "+". This is the default format for
187 CASAVA >= 1.8.
188
189 -----
190
191 **Options**
192
193 **Single-end**
194
195 This option takes one single-end input file and outputs one single-end
196 output file of reads that passed the filters.
197
198 **Paired-End (one interleaved input file)**
199
200 This option takes as input one interleaved paired-end file. If you then
201 check the "Output only one file with all reads" checkbox, it will output
202 one interleaved file where any read that did not pass filter will be replaced
203 with a FASTQ record where the sequence is a single "N" and the quality is the
204 lowest quality possible for that quality type. This will preserve the paired
205 nature of the data. If you leave the checkbox unchecked, it will output two files,
206 one interleaved file with all the passed pairs and one singletons file where only
207 one of the pair passed filter.
208
209 **Paired-End (two separate input files)**
210
211 This option takes two separate (forward and reverse) paired-end files as input.
212 The output is three files: Two paired-end files with pairs that passed filter and
213 a singletons file where only one of the pair passed filter.
214
215 **Quality threshold**
216
217 Input your desired quality threshold. This threshold is phred-scaled, which is typically
218 values between 0-41 for FASTQ data.
219
220 **Length threshold**
221
222 Input your desired length threshold. This is the threshold to determine if a read is kept
223 after all the trimming steps are done.
224
225 **Disable 5-prime trimming**
226
227 An option to disable trimming the read on the 5-prime end. This trimming trims the read
228 if the average quality values dip below the quality threshold at the 5-prime end.
229
230 **Truncate sequences with Ns**
231
232 This option will trim a read at the first "N" base in the read after doing quality trimming.
233 It is then still subject to the length threshold.
234
235 -----
236
237 Copyright: Nikhil Joshi
238
239 http://bioinformatics.ucdavis.edu
240
241 http://github.com/najoshi/sickle
242 </help>
243 <citations>
244 <citation type="bibtex">
245 @unpublished{sickle_link,
246 author = {Joshi, Nikhil A. and Fass, Joseph N.},
247 title = {Sickle: A windowed adaptive trimming tool for FASTQ files using quality},
248 year = 2011,
249 url = { https://github.com/najoshi/sickle }
250 }
251 </citation>
252 </citations>
253 </tool>