comparison cutadapt.xml @ 39:fe74900d6dc7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cutadapt commit a5b6cb44f81abe57a4269bded1fa4d41f462f9d5
author iuc
date Fri, 17 May 2024 13:32:03 +0000
parents 944ae523bacb
children aa784cb3810d
comparison
equal deleted inserted replaced
38:b1c926deaa2d 39:fe74900d6dc7
5 </macros> 5 </macros>
6 <expand macro='edam_ontology' /> 6 <expand macro='edam_ontology' />
7 <expand macro='xrefs'/> 7 <expand macro='xrefs'/>
8 <expand macro='requirements' /> 8 <expand macro='requirements' />
9 <version_command>cutadapt --version</version_command> 9 <version_command>cutadapt --version</version_command>
10
11 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
12 ## Link in the input and output files, so Cutadapt can tell their type
13
14 #import re 11 #import re
15 #set read1 = "input_f" 12
16 #set read2 = "input_r" 13 ## set things up for handling inputs and outputs in single- vs paired-end modes
17 #set paired = False
18 #set library_type = str($library.type) 14 #set library_type = str($library.type)
15 #if $library_type == 'single':
16 #set paired = False
17 #else:
18 #set paired = True
19 #end if
20
19 #if $library_type == 'paired': 21 #if $library_type == 'paired':
20 #set paired = True
21 #set input_1 = $library.input_1 22 #set input_1 = $library.input_1
22 #set input_2 = $library.input_2 23 #set input_2 = $library.input_2
23 ## Avoid the paired read input files sharing the same name, else the program still runs but 24 ## Avoid the paired read input files sharing the same name, else the program still runs but
24 ## uses inaccurate results by using only 1 of 2 files. 25 ## uses inaccurate results by using only 1 of 2 files.
25 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier)) 26 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier))
26 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_2.element_identifier)) 27 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_2.element_identifier))
27 #if read1 == read2: 28 #if read1 == read2:
28 #set read1 = read1 + "_1" 29 #set read1 = read1 + "_1"
29 #set read2 = read2 + "_2" 30 #set read2 = read2 + "_2"
30 #end if 31 #end if
31 #else if $library_type == 'paired_collection' 32 #elif $library_type == 'paired_collection'
32 #set paired = True
33 #set input_1 = $library.input_1.forward 33 #set input_1 = $library.input_1.forward
34 #set input_2 = $library.input_1.reverse 34 #set input_2 = $library.input_1.reverse
35 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_1" 35 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_1"
36 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_2" 36 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_2"
37 #else 37 #else
48 #set ext=ext+".gz" 48 #set ext=ext+".gz"
49 #elif $input_1.ext.endswith(".bz2") 49 #elif $input_1.ext.endswith(".bz2")
50 #set ext=ext+".bz2" 50 #set ext=ext+".bz2"
51 #end if 51 #end if
52 52
53
54 #set read1 = $read1 + $ext 53 #set read1 = $read1 + $ext
55 #set out1 = "out1" + $ext 54 #set out1 = "out1" + $ext
56 #set rest_output = "rest_output" + $ext 55 #set rest_output = "rest_output" + $ext
57 #set wild_output = "wild_output" + $ext 56 #set wild_output = "wild_output" + $ext
58 #set too_short_output = "too_short_output" + $ext 57 #set too_short_output = "too_short_output" + $ext
59 #set too_long_output = "too_long_output" + $ext 58 #set too_long_output = "too_long_output" + $ext
60 #set untrimmed_output = "untrimmed_output" + $ext 59 #set untrimmed_output = "untrimmed_output" + $ext
61 ln -f -s '${input_1}' '$read1' &&
62 60
63 #if $paired: 61 #if $paired:
64 #if $input_2.is_of_type("fastq", "fastq.gz", "fastq.bz2"): 62 #if $input_2.is_of_type("fastq", "fastq.gz", "fastq.bz2"):
65 #set ext2 = ".fq" 63 #set ext2 = ".fq"
66 #else 64 #else
74 #set read2 = $read2 + $ext2 72 #set read2 = $read2 + $ext2
75 #set out2 = "out2" + $ext2 73 #set out2 = "out2" + $ext2
76 #set too_short_paired_output = "too_short_paired_output" + $ext2 74 #set too_short_paired_output = "too_short_paired_output" + $ext2
77 #set too_long_paired_output = "too_long_paired_output" + $ext2 75 #set too_long_paired_output = "too_long_paired_output" + $ext2
78 #set untrimmed_paired_output = "untrimmed_paired_output" + $ext2 76 #set untrimmed_paired_output = "untrimmed_paired_output" + $ext2
79 ln -f -s '${input_2}' '$read2' && 77 #end if
80 #end if 78
81 79 ## Link in the input and output files, so Cutadapt can tell their type
82 ## Run Cutadapt 80 ln -f -s '$input_1' '$read1' &&
83 81 #if $paired:
82 ln -f -s '$input_2' '$read2' &&
83 #end if
84 ## Create dedicated output folder if needed
84 #if 'multiple_output' in $output_selector: 85 #if 'multiple_output' in $output_selector:
85 mkdir split && 86 mkdir split &&
86 #end if 87 #end if
87 88
89 ## Run Cutadapt
88 cutadapt 90 cutadapt
89 91
90 -j=\${GALAXY_SLOTS:-4} 92 -j=\${GALAXY_SLOTS:-4}
91 93
92 #if 'json_stats' in $output_selector: 94 ## Read1 trimming
93 --json stats.json 95 #set ADAPTER_ARGUMENT="-a"
94 #end if 96 #for $a in $library.r1.adapters
95 97 @adapter_cli@
96 #if str( $library.type ) == "single": 98 #end for
97 @read1_options@ 99 #set ADAPTER_ARGUMENT="-b"
98 #if 'multiple_output' in $output_selector: 100 #for $a in $library.r1.anywhere_adapters
99 --output='split/{name}.${input_1.ext}' 101 @adapter_cli@
100 #else: 102 #end for
101 --output='$out1' 103 #set ADAPTER_ARGUMENT="-g"
102 #end if 104 #for $a in $library.r1.front_adapters
103 #else: 105 @adapter_cli@
104 @read1_options@ 106 #end for
105 @read2_options@ 107
106 --output='$out1' 108 #if $paired:
107 --paired-output='$out2' 109 ## Read2 trimming
110 #set ADAPTER_ARGUMENT="-A"
111 #for $a in $library.r2.adapters2
112 @adapter_cli@
113 #end for
114 #set ADAPTER_ARGUMENT="-B"
115 #for $a in $library.r2.anywhere_adapters2
116 @adapter_cli@
117 #end for
118 #set ADAPTER_ARGUMENT="-G"
119 #for $a in $library.r2.front_adapters2
120 @adapter_cli@
121 #end for
122 $library.pair_adapters
108 #end if 123 #end if
109 124
110 --error-rate=$adapter_options.error_rate 125 --error-rate=$adapter_options.error_rate
111 --times=$adapter_options.times 126 --times=$adapter_options.times
112 --overlap=$adapter_options.overlap 127 --overlap=$adapter_options.overlap
114 $adapter_options.match_read_wildcards 129 $adapter_options.match_read_wildcards
115 $adapter_options.no_match_adapter_wildcards 130 $adapter_options.no_match_adapter_wildcards
116 --action=$adapter_options.action 131 --action=$adapter_options.action
117 $adapter_options.revcomp 132 $adapter_options.revcomp
118 133
134 #if $other_trimming_options.cut != 0:
135 --cut=$other_trimming_options.cut
136 #end if
137 #if $paired and $other_trimming_options.cut2 != 0:
138 -U $other_trimming_options.cut2
139 #end if
140 #if str($other_trimming_options.quality_cutoff) != '0':
141 --quality-cutoff=$other_trimming_options.quality_cutoff
142 #end if
143 #if $paired and str($other_trimming_options.quality_cutoff2) != '':
144 -Q $other_trimming_options.quality_cutoff2
145 #end if
146 #if str($other_trimming_options.nextseq_trim) != '0':
147 --nextseq-trim=$other_trimming_options.nextseq_trim
148 #end if
149 $other_trimming_options.trim_n
150 $other_trimming_options.poly_a
151 #if str($other_trimming_options.shorten_options.shorten_values) == 'True':
152 #if str($other_trimming_options.shorten_options.shorten_end) == '3prime'
153 --length=$other_trimming_options.shorten_options.length
154 #else
155 --length=-$other_trimming_options.shorten_options.length
156 #end if
157 #end if
158
119 $filter_options.discard_trimmed 159 $filter_options.discard_trimmed
120 $filter_options.discard_untrimmed 160 $filter_options.discard_untrimmed
121 161
122 #if str($filter_options.minimum_length) and str($library.type) != "single" and str($library.minimum_length2) != '': 162 #if $paired and str($filter_options.minimum_length2):
123 --minimum-length=$filter_options.minimum_length:$library.minimum_length2 163 --minimum-length=$filter_options.minimum_length:$filter_options.minimum_length2
124 #else if str($filter_options.minimum_length): 164 #elif $filter_options.minimum_length > 0:
125 --minimum-length=$filter_options.minimum_length 165 --minimum-length=$filter_options.minimum_length
126 #end if 166 #end if
127 #if str($filter_options.maximum_length) and str($library.type) != "single" and str($library.maximum_length2) != '': 167 #if $paired and str($filter_options.maximum_length2):
128 --maximum-length=$filter_options.maximum_length:$library.maximum_length2 168 --maximum-length=$filter_options.maximum_length:$filter_options.maximum_length2
129 #else if str($filter_options.maximum_length): 169 #elif str($filter_options.maximum_length):
130 --maximum-length=$filter_options.maximum_length 170 --maximum-length=$filter_options.maximum_length
131 #end if 171 #end if
132 #if str($filter_options.max_n): 172 #if str($filter_options.max_n):
133 --max-n=$filter_options.max_n 173 --max-n=$filter_options.max_n
134 #end if 174 #end if
135 #if str( $library.type ) != "single":
136 #if $filter_options.pair_filter:
137 --pair-filter=$filter_options.pair_filter
138 #end if
139 #end if
140 #if str($filter_options.max_expected_errors): 175 #if str($filter_options.max_expected_errors):
141 --max-expected-errors=$filter_options.max_expected_errors 176 --max-ee=$filter_options.max_expected_errors
142 #end if 177 #end if
143 $filter_options.discard_cassava 178 #if str($filter_options.max_average_error_rate):
179 --max-aer=$filter_options.max_average_error_rate
180 #end if
181 $filter_options.discard_casava
182 #if $paired and str($filter_options.pair_filter) != 'any':
183 --pair-filter=$filter_options.pair_filter
184 #end if
144 185
145 #if $input_1.ext.startswith("fastqillumina") or $input_1.ext.startswith("fastqsolexa") 186 #if $input_1.ext.startswith("fastqillumina") or $input_1.ext.startswith("fastqsolexa")
146 --quality-base=64 187 --quality-base=64
147 #end if 188 #end if
148 189
149 #if str($read_mod_options.quality_cutoff) != '0':
150 --quality-cutoff=$read_mod_options.quality_cutoff
151 #end if
152 #if str($read_mod_options.nextseq_trim) != '0':
153 --nextseq-trim=$read_mod_options.nextseq_trim
154 #end if
155 $read_mod_options.trim_n
156 $read_mod_options.poly_a
157 #if $read_mod_options.strip_suffix != '' 190 #if $read_mod_options.strip_suffix != ''
158 --strip-suffix $read_mod_options.strip_suffix 191 --strip-suffix='$read_mod_options.strip_suffix'
159 #end if
160 #if str($read_mod_options.shorten_options.shorten_values) == 'True':
161 #if str($read_mod_options.shorten_options.shorten_end) == '3prime'
162 --length=$read_mod_options.shorten_options.length
163 #else
164 --length=-$read_mod_options.shorten_options.length
165 #end if
166 #end if 192 #end if
167 #if str($read_mod_options.length_tag) != '': 193 #if str($read_mod_options.length_tag) != '':
168 --length-tag='$read_mod_options.length_tag' 194 --length-tag='$read_mod_options.length_tag'
169 #end if 195 #end if
170 #if str($read_mod_options.rename) != '': 196 #if str($read_mod_options.rename) != '':
171 --rename='$read_mod_options.rename' 197 --rename='$read_mod_options.rename'
172 #end if 198 #end if
173 $read_mod_options.zero_cap 199 $read_mod_options.zero_cap
174 200
175 201 ## Outputs handling
176 '${read1}' 202 #if 'json_stats' in $output_selector:
203 --json=stats.json
204 #end if
205 #if 'info_file' in $output_selector:
206 --info-file='$info_file'
207 #end if
208 #if 'rest_file' in $output_selector:
209 -r='${rest_output}'
210 #end if
211 #if 'wildcard_file' in $output_selector:
212 --wildcard-file='${wild_output}'
213 #end if
214 #if 'too_short_file' in $output_selector:
215 --too-short-output='${too_short_output}'
216 #if $paired:
217 --too-short-paired-output='${too_short_paired_output}'
218 #end if
219 #end if
220 #if 'too_long_file' in $output_selector:
221 --too-long-output='${too_long_output}'
222 #if $paired:
223 --too-long-paired-output='${too_long_paired_output}'
224 #end if
225 #end if
226 #if 'untrimmed_file' in $output_selector:
227 --untrimmed-output='${untrimmed_output}'
228 #if $paired:
229 --untrimmed-paired-output='${untrimmed_paired_output}'
230 #end if
231 #end if
232 #if not $paired and 'multiple_output' in $output_selector:
233 -o 'split/{name}.${input_1.ext}'
234 #else:
235 -o '$out1'
236 #if $paired:
237 -p '$out2'
238 #end if
239 #end if
240
241 '$read1'
177 #if $paired: 242 #if $paired:
178 '${read2}' 243 '$read2'
179 #if $library.r2.quality_cutoff2:
180 -Q=$library.r2.quality_cutoff2
181 #end if
182 #end if 244 #end if
183 245
184 #if 'report' in $output_selector: 246 #if 'report' in $output_selector:
185 > report.txt 247 > report.txt
186 #end if 248 #end if
187 ]]></command> 249 ]]></command>
188 <inputs> 250 <inputs>
189
190 <!-- Reads --> 251 <!-- Reads -->
191 <conditional name="library"> 252 <conditional name="library">
192 <param name="type" type="select" label="Single-end or Paired-end reads?"> 253 <param name="type" type="select" label="Single-end or Paired-end reads?">
193 <option value="single">Single-end</option> 254 <option value="single">Single-end</option>
194 <option value="paired">Paired-end</option> 255 <option value="paired">Paired-end</option>
195 <option value="paired_collection">Paired-end Collection</option> 256 <option value="paired_collection">Paired-end Collection</option>
196 </param> 257 </param>
197
198 <when value="single"> 258 <when value="single">
199 <param name="input_1" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file" help="Should be of datatype &quot;fastq.gz&quot; or &quot;fasta&quot;" /> 259 <param name="input_1" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file" help="Should be of datatype &quot;fastq.gz&quot; or &quot;fasta&quot;" />
200 <expand macro="single_end_options" /> 260 <expand macro="read1_adapters" />
201 </when> 261 </when>
202
203 <when value="paired"> 262 <when value="paired">
204 <param name="input_1" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file #1" help="Should be of datatype &quot;fastq.gz&quot;or &quot;fasta&quot;" /> 263 <param name="input_1" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file #1" help="Should be of datatype &quot;fastq.gz&quot;or &quot;fasta&quot;" />
205 <param name="input_2" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file #2" help="Should be of datatype &quot;fastq.gz&quot;or &quot;fasta&quot;" /> 264 <param name="input_2" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file #2" help="Should be of datatype &quot;fastq.gz&quot;or &quot;fasta&quot;" />
206 <expand macro="paired_end_options" /> 265 <expand macro="read1_adapters" />
266 <expand macro="read2_adapters" />
207 </when> 267 </when>
208
209 <when value="paired_collection"> 268 <when value="paired_collection">
210 <param name="input_1" format="@FASTQ_TYPES@" type="data_collection" collection_type="paired" label="Paired Collection" help="Should be of datatype &quot;fastq.gz&quot; or &quot;fasta&quot;" /> 269 <param name="input_1" format="@FASTQ_TYPES@" type="data_collection" collection_type="paired" label="Paired Collection" help="Should be of datatype &quot;fastq.gz&quot; or &quot;fasta&quot;" />
211 <expand macro="paired_end_options" /> 270 <expand macro="read1_adapters" />
271 <expand macro="read2_adapters" />
212 </when> 272 </when>
213
214 </conditional> 273 </conditional>
215
216 <!-- Adapter Options --> 274 <!-- Adapter Options -->
217 <section name="adapter_options" title="Adapter Options"> 275 <section name="adapter_options" title="Adapter Handling Options">
218 <param name="action" type="select" label="What to do if a match is found"> 276 <param name="action" type="select" label="What to do if a match is found">
219 <option value="trim" selected="true">Trim: trim adapter and upstream or downstream sequence</option> 277 <option value="trim" selected="true">Trim: trim adapter and upstream or downstream sequence</option>
220 <option value="retain">Retain: the read is trimmed, but the adapter sequence is not removed</option> 278 <option value="retain">Retain: the read is trimmed, but the adapter sequence is not removed</option>
221 <option value="mask">Mask: mask adapters with 'N' characters instead of trimming them</option> 279 <option value="mask">Mask: mask adapters with 'N' characters instead of trimming them</option>
222 <option value="lowercase">Lowercase: convert to lowercase</option> 280 <option value="lowercase">Lowercase: convert to lowercase</option>
225 <param argument="--error-rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." /> 283 <param argument="--error-rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." />
226 <param argument="--no-indels" type="boolean" checked="false" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." /> 284 <param argument="--no-indels" type="boolean" checked="false" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." />
227 <param argument="--times" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." /> 285 <param argument="--times" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." />
228 <param argument="--overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." /> 286 <param argument="--overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." />
229 <param argument="--match-read-wildcards" type="boolean" checked="false" truevalue="--match-read-wildcards" falsevalue="" label="Match wilcards in reads" help="Interpret IUPAC wildcards in reads"/> 287 <param argument="--match-read-wildcards" type="boolean" checked="false" truevalue="--match-read-wildcards" falsevalue="" label="Match wilcards in reads" help="Interpret IUPAC wildcards in reads"/>
230 <param argument="--no-match-adapter-wildcards" type="boolean" checked="true" truevalue="" falsevalue="--no-match-adapter-wildcards" label="Match wilcards in adapters" help="Interpret IUPAC wildcards in adapters."/> 288 <param argument="--no-match-adapter-wildcards" type="boolean" checked="true" truevalue="" falsevalue="--no-match-adapter-wildcards" label="Match wildcards in adapters" help="Interpret IUPAC wildcards in adapters."/>
231 <param argument="--revcomp" type="boolean" checked="false" truevalue="--revcomp" falsevalue="" label="Look for adapters in the reverse complement" help="Check both the read and its reverse complement for adapter matches. If match is on reverse-complemented version, output that one. Default: check only read." /> 289 <param name="revcomp" argument="--rc" type="boolean" checked="false" truevalue="--rc" falsevalue="" label="Look for adapters in the reverse complement" help="Check both the read and its reverse complement for adapter matches. If match is on reverse-complemented version, output that one. Default: check only read." />
232 </section> 290 </section>
233 291 <section name="other_trimming_options" title="Other Read Trimming Options">
234 <!-- Filter Options --> 292 <param argument="--cut" type="integer" value="0" label="Bases to cut from R1 reads before adapter trimming" help="Remove bases from each read (first read only if paired). If positive, remove bases from the beginning. If negative, remove bases from the end. This is applied *before* adapter trimming." />
235 <section name="filter_options" title="Filter Options"> 293 <param name="cut2" argument="-U" type="integer" value="0" label="Bases to cut from R2 reads before adapter trimming" help="For paired-end data, you can define here a cut value to apply to R2 reads. Usage is identical to the R1 setting. Default: 0; ignored for single-end data." />
236 <param argument="--discard-trimmed" type="boolean" checked="false" truevalue="--discard-trimmed" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" /> 294 <param argument="--quality-cutoff" type="text" value="0" label="Quality cutoff(s) (R1)" help="Trim low-quality bases from 5' and/or 3' ends of each read before adapter removal. If one value is given, only the 3' end is trimmed. If two comma-separated cutoffs are given, the 5' end is trimmed with the first cutoff, the 3' end with the second.">
237 <param argument="--discard_untrimmed" type="boolean" checked="false" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." /> 295 <sanitizer>
238 <param argument="--minimum-length" type="integer" min="0" optional="true" value="" label="Minimum length (R1)" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded." /> 296 <valid initial="string.digits"><add value="," /></valid>
239 <param argument="--maximum-length" type="integer" min="0" optional="true" value="" label="Maximum length (R1)" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded." /> 297 </sanitizer>
240 <param argument="--max-n" type="float" min="0" optional="true" label="Max N" help="Discard reads with more than this number of 'N' bases. A number between 0 and 1 is interpreted as a fraction of the read length." /> 298 <validator type="regex">[0-9]+(,[0-9])?</validator>
241 <param argument="--pair-filter" type="select" optional="true" label="Pair filter" help="Which of the reads in a paired-end read have to match the filtering criterion in order for the pair to be filtered. Default: any">
242 <option value="any" selected="true">Any: a read pair is discarded (or redirected) if one of the reads (R1 or R2) fulfills the filtering criterion. </option>
243 <option value="both">Both: filtering criteria must apply to both reads in order for a read pair to be discarded. </option>
244 <option value="first">First: will make a decision about the read pair by inspecting whether the filtering criterion applies to the first read, ignoring the second read.</option>
245
246 </param> 299 </param>
247 <param argument="--max-expected-errors" type="float" min="0" optional="true" value="" label="Max expected errors" help="Discard reads whose expected number of errors (computed from quality values) exceeds this value." /> 300 <param name="quality_cutoff2" argument="-Q" type="text" optional="true" value="" label="Quality cutoff(s) R2" help="For paired-end data, you can set here a separate quality cutoff to apply to R2 reads specifically. Leave empty to reuse the R1 cutoff setting. Ignored for single-end data. Syntax is identical to the R1 setting.">
248 <param argument="--max-average-error-rate" type="float" min="0" max="1" optional="true" value="" label="Max average expected errors" help="As --max-expected-errors (see above), but divided by length to account for reads of varying length" />
249 <param argument="--discard-cassava" type="boolean" truevalue="--discard-cassava" falsevalue="" checked="false" label="Discard CASAVA filtering" help="Discard reads that did not pass CASAVA filtering (header has :Y:)." />
250 </section>
251
252 <!-- Read Modification Options -->
253 <section name="read_mod_options" title="Read Modification Options">
254 <param argument="--cut" type="integer" value="0" optional="true" label="Cut bases from reads before adapter trimming" help="Remove bases from each read (first read only if paired). If positive, remove bases from the beginning. If negative, remove bases from the end. This is applied *before* adapter trimming." />
255 <param argument="--quality-cutoff" type="text" value="0" label="Quality cutoff" help=" Trim low-quality bases from 5' and/or 3' ends of each read before adapter removal. Applied to both reads for paired-end data, unless a separate value for the second read is specified. If one value is given, only the 3' end is trimmed. If two comma-separated cutoffs are given, the 5' end is trimmed with the first cutoff, the 3' end with the second.">
256 <sanitizer> 301 <sanitizer>
257 <valid initial="string.digits"><add value="," /></valid> 302 <valid initial="string.digits"><add value="," /></valid>
258 </sanitizer> 303 </sanitizer>
259 <validator type="regex">[0-9]+(,[0-9])?</validator> 304 <validator type="regex">[0-9]+(,[0-9])?</validator>
260 </param> 305 </param>
261 <param argument="--nextseq-trim" type="integer" value="0" label="NextSeq trimming" help="Experimental option for quality trimming of NextSeq data. This is necessary because that machine cannot distinguish between G and reaching the end of the fragment (it encodes G as ‘black’). This option works like regular quality trimming (where one would use -q 20 instead), except that the qualities of G bases are ignored." /> 306 <param argument="--nextseq-trim" type="integer" value="0" label="NextSeq trimming" help="Experimental option for quality trimming of NextSeq data. This is necessary because that machine cannot distinguish between G and reaching the end of the fragment (it encodes G as ‘black’). This option works like regular quality trimming (where one would use -q 20 instead), except that the qualities of G bases are ignored." />
262 <param argument="--trim-n" type="boolean" truevalue="--trim-n" falsevalue="" checked="false" label="Trim Ns" help="Trim N's on ends of reads." /> 307 <param argument="--trim-n" type="boolean" truevalue="--trim-n" falsevalue="" checked="false" label="Trim Ns" help="Trim N's on ends of reads." />
263 <param argument="--poly-a" type="boolean" truevalue="--poly-a" falsevalue="" checked="false" label="Trim poly-A tails" help="Note, this trim poly-T 'heads' on R2"/> 308 <param argument="--poly-a" type="boolean" truevalue="--poly-a" falsevalue="" checked="false" label="Trim poly-A tails" help="Note, this trim poly-T 'heads' on R2"/>
264 <param argument="--strip-suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." />
265 <conditional name="shorten_options"> 309 <conditional name="shorten_options">
266 <param name="shorten_values" type="select" label="Shortening reads to a fixed length" help="If you want to remove a fixed number of bases from each read, use the –cut option instead."> 310 <param name="shorten_values" type="select" label="Shortening reads to a fixed length" help="If you want to remove a fixed number of bases from each read, use the –cut option instead.">
267 <option value="True">Enabled</option> 311 <option value="True">Enabled</option>
268 <option value="False" selected="true">Disabled</option> 312 <option value="False" selected="true">Disabled</option>
269 </param> 313 </param>
275 </param> 319 </param>
276 </when> 320 </when>
277 <when value="False"> 321 <when value="False">
278 </when> 322 </when>
279 </conditional> 323 </conditional>
324 </section>
325 <!-- Filter Options -->
326 <section name="filter_options" title="Read Filtering Options">
327 <param argument="--discard-trimmed" type="boolean" checked="false" truevalue="--discard-trimmed" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" />
328 <param argument="--discard_untrimmed" type="boolean" checked="false" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." />
329 <param argument="--minimum-length" type="integer" min="0" value="1" label="Minimum length (R1)" help="Discard reads that, after processing, are shorter than LENGTH. Note: You can set this parameter to zero to keep empty reads (with zero-length sequence and quality string) in the output, but some downstream tools may have problems with these. Default: 1" />
330 <param name="minimum_length2" type="integer" min="0" value="" optional="true" label="Minimum length (R2)" help="For paired-end data, you can specify here a separate minimum length cutoff to apply to R2 reads. Leave empty to reuse the R1 cutoff set above. Ignored for single-end data." />
331 <param argument="--maximum-length" type="integer" min="0" optional="true" value="" label="Maximum length (R1)" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded." />
332 <param name="maximum_length2" type="integer" min="0" value="" optional="true" label="Maximum length (R2)" help="For paired-end data, you can specify here a separate maximum length cutoff to apply to R2 reads. Leave empty to reuse the R1 cutoff set above. Ignored for single-end data." />
333 <param argument="--max-n" type="float" min="0" optional="true" label="Max N" help="Discard reads with more than this number of 'N' bases. A number between 0 and 1 is interpreted as a fraction of the read length." />
334 <param name="max_expected_errors" argument="--max-ee" type="float" min="0" optional="true" value="" label="Max expected errors" help="Discard reads whose expected number of errors (computed from quality values) exceeds this value." />
335 <param name="max_average_error_rate" argument="--max-aer" type="float" min="0" max="1" optional="true" value="" label="Max average expected errors" help="As --max-expected-errors (see above), but divided by length to account for reads of varying length" />
336 <param argument="--discard-casava" type="boolean" truevalue="--discard-casava" falsevalue="" checked="false" label="Discard CASAVA-filtered reads" help="Discard reads that did not pass CASAVA filtering (header has :Y:)." />
337 <param argument="--pair-filter" type="select" label="Pair filter" help="Which of the reads in a paired-end read have to match the filtering critera above in order for the pair to be filtered. Default: any">
338 <option value="any" selected="true">Any: a read pair is discarded (or redirected) if one of the reads (R1 or R2) fulfills the filtering criterion.</option>
339 <option value="both">Both: filtering criteria must apply to both reads in order for a read pair to be discarded.</option>
340 <option value="first">First: will make a decision about the read pair by inspecting whether the filtering criterion applies to the first read, ignoring the second read.</option>
341 </param>
342 </section>
343 <!-- Read Modification Options -->
344 <section name="read_mod_options" title="Read Modification Options">
345 <param argument="--strip-suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." />
280 <param argument="--length-tag" label="Length tag" type="text" optional="true" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." > 346 <param argument="--length-tag" label="Length tag" type="text" optional="true" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." >
281 <sanitizer invalid_char=""> 347 <sanitizer invalid_char="">
282 <valid initial="string.letters,string.digits"> 348 <valid initial="string.letters,string.digits">
283 <add value="/" /> 349 <add value="/" />
284 <add value="+" /> 350 <add value="+" />
302 </sanitizer> 368 </sanitizer>
303 <validator type="regex">[A-Za-z0-9 {}=_]+</validator> 369 <validator type="regex">[A-Za-z0-9 {}=_]+</validator>
304 </param> 370 </param>
305 <param argument="--zero-cap" type="boolean" truevalue="--zero-cap" falsevalue="" checked="false" label="Change negative quality values to zero" /> 371 <param argument="--zero-cap" type="boolean" truevalue="--zero-cap" falsevalue="" checked="false" label="Change negative quality values to zero" />
306 </section> 372 </section>
307
308 <!-- Output Options --> 373 <!-- Output Options -->
309 <param name="output_selector" type="select" multiple="true" display="checkboxes" label="Outputs selector"> 374 <param name="output_selector" type="select" multiple="true" display="checkboxes" label="Additional outputs to generate">
310 <option value="report">Report: Cutadapt's per-adapter statistics. You can use this file with MultiQC.</option> 375 <option value="report">Report: Cutadapt's per-adapter statistics. You can use this file with MultiQC.</option>
311 <option value="info_file">Info file: write information about each read and its adapter matches.</option> 376 <option value="info_file">Info file: write information about each read and its adapter matches.</option>
312 <option value="rest_file">Rest of read: when the adapter matches in the middle of a read, write the rest (after the adapter).</option> 377 <option value="rest_file">Rest of read: when the adapter matches in the middle of a read, write the rest (after the adapter).</option>
313 <option value="wildcard_file">Wildcard file: when the adapter has wildcard bases (Ns) write adapter bases matching wildcard positions.</option> 378 <option value="wildcard_file">Wildcard file: when the adapter has wildcard bases (Ns) write adapter bases matching wildcard positions.</option>
314 <option value="too_short_file">Too short reads: write reads that are too short according to minimum length specified (default: discard reads).</option> 379 <option value="too_short_file">Too short reads: write reads that are too short according to minimum length specified (default: discard reads).</option>
316 <option value="untrimmed_file">Untrimmed reads: write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file (default: output to same file as trimmed)</option> 381 <option value="untrimmed_file">Untrimmed reads: write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file (default: output to same file as trimmed)</option>
317 <option value="multiple_output">Multiple output: create a separate file for each adapter trimmed (default: all trimmed reads are in a single file)</option> 382 <option value="multiple_output">Multiple output: create a separate file for each adapter trimmed (default: all trimmed reads are in a single file)</option>
318 <option value="json_stats">Statistics in JSON format</option> 383 <option value="json_stats">Statistics in JSON format</option>
319 </param> 384 </param>
320 </inputs> 385 </inputs>
321
322 <outputs> 386 <outputs>
323 <data name="out1" format="fastqsanger" metadata_source="library|input_1" default_identifier_source="library|input_1" from_work_dir="out1.*" label="${tool.name} on ${on_string}: Read 1 Output"> 387 <data name="out1" format="fastqsanger" metadata_source="library|input_1" default_identifier_source="library|input_1" from_work_dir="out1.*" label="${tool.name} on ${on_string}: Read 1 Output">
324 <filter>library['type'] != 'paired_collection' and 'multiple_output' not in output_selector</filter> 388 <filter>library['type'] != 'paired_collection'</filter>
389 <filter>not output_selector or 'multiple_output' not in output_selector</filter>
325 <expand macro="inherit_format_1" /> 390 <expand macro="inherit_format_1" />
326 </data> 391 </data>
327
328 <data name="out2" format="fastqsanger" metadata_source="library|input_2" default_identifier_source="library|input_2" from_work_dir="out2.*" label="${tool.name} on ${on_string}: Read 2 Output" > 392 <data name="out2" format="fastqsanger" metadata_source="library|input_2" default_identifier_source="library|input_2" from_work_dir="out2.*" label="${tool.name} on ${on_string}: Read 2 Output" >
329 <filter>library['type'] == 'paired' and 'multiple_output' not in output_selector</filter> 393 <filter>library['type'] == 'paired'</filter>
394 <filter>not output_selector or 'multiple_output' not in output_selector</filter>
330 <expand macro="inherit_format_2" /> 395 <expand macro="inherit_format_2" />
331 </data> 396 </data>
332
333 <collection name="out_pairs" type="paired" format_source="library|input_1" label="${tool.name} on ${on_string}: Reads"> 397 <collection name="out_pairs" type="paired" format_source="library|input_1" label="${tool.name} on ${on_string}: Reads">
334 <filter>library['type'] == 'paired_collection' and 'multiple_output' not in output_selector</filter> 398 <filter>library['type'] == 'paired_collection'</filter>
399 <filter>not output_selector or 'multiple_output' not in output_selector</filter>
335 <data name="forward" from_work_dir="out1.*" /> 400 <data name="forward" from_work_dir="out1.*" />
336 <data name="reverse" from_work_dir="out2.*" /> 401 <data name="reverse" from_work_dir="out2.*" />
337 </collection> 402 </collection>
338
339 <data name="report" format="txt" from_work_dir="report.txt" label="${tool.name} on ${on_string}: Report"> 403 <data name="report" format="txt" from_work_dir="report.txt" label="${tool.name} on ${on_string}: Report">
340 <filter>output_selector and 'report' in output_selector</filter> 404 <filter>output_selector and 'report' in output_selector</filter>
341 </data> 405 </data>
342 <data name="info_file" format="txt" metadata_source="library|input_1" label="${tool.name} on ${on_string}: Info File" > 406 <data name="info_file" format="txt" metadata_source="library|input_1" label="${tool.name} on ${on_string}: Info File" >
343 <filter>output_selector and 'info_file' in output_selector</filter> 407 <filter>output_selector and 'info_file' in output_selector</filter>
344 </data> 408 </data>
345
346 <data name="rest_output" format="fastqsanger" metadata_source="library|input_1" from_work_dir="rest_output*" label="${tool.name} on ${on_string}: Rest of Reads (R1 only)" > 409 <data name="rest_output" format="fastqsanger" metadata_source="library|input_1" from_work_dir="rest_output*" label="${tool.name} on ${on_string}: Rest of Reads (R1 only)" >
347 <filter>output_selector and 'rest_file' in output_selector</filter> 410 <filter>output_selector and 'rest_file' in output_selector</filter>
348 <expand macro="inherit_format_1" /> 411 <expand macro="inherit_format_1" />
349 </data> 412 </data>
350
351 <data name="wild_output" format="txt" metadata_source="library|input_1" from_work_dir="wild_output*" label="${tool.name} on ${on_string}: Wildcard File" > 413 <data name="wild_output" format="txt" metadata_source="library|input_1" from_work_dir="wild_output*" label="${tool.name} on ${on_string}: Wildcard File" >
352 <filter>output_selector and 'wildcard_file' in output_selector</filter> 414 <filter>output_selector and 'wildcard_file' in output_selector</filter>
353 </data> 415 </data>
354
355 <data name="untrimmed_output" format="fastqsanger" metadata_source="library|input_1" default_identifier_source="library|input_1" from_work_dir="untrimmed_output*" label="${tool.name} on ${on_string}: Untrimmed Read 1" > 416 <data name="untrimmed_output" format="fastqsanger" metadata_source="library|input_1" default_identifier_source="library|input_1" from_work_dir="untrimmed_output*" label="${tool.name} on ${on_string}: Untrimmed Read 1" >
356 <filter>output_selector and 'untrimmed_file' in output_selector</filter> 417 <filter>output_selector and 'untrimmed_file' in output_selector</filter>
357 <expand macro="inherit_format_1" /> 418 <expand macro="inherit_format_1" />
358 </data> 419 </data>
359 <data name="untrimmed_paired_output" format="fastqsanger" metadata_source="library|input_2" default_identifier_source="library|input_2" from_work_dir="untrimmed_paired_output*" label="${tool.name} on ${on_string}: Untrimmed Read 2" > 420 <data name="untrimmed_paired_output" format="fastqsanger" metadata_source="library|input_2" default_identifier_source="library|input_2" from_work_dir="untrimmed_paired_output*" label="${tool.name} on ${on_string}: Untrimmed Read 2" >
360 <filter>library['type'] == 'paired' or library['type'] == 'paired_collection'</filter> 421 <filter>library['type'] == 'paired' or library['type'] == 'paired_collection'</filter>
361 <filter>output_selector and 'untrimmed_file' in output_selector </filter> 422 <filter>output_selector and 'untrimmed_file' in output_selector </filter>
362 <expand macro="inherit_format_2" /> 423 <expand macro="inherit_format_2" />
363 </data> 424 </data>
364
365 <data name="too_short_output" format="fastqsanger" metadata_source="library|input_1" default_identifier_source="library|input_1" from_work_dir="too_short_output*" label="${tool.name} on ${on_string}: Too Short Read 1" > 425 <data name="too_short_output" format="fastqsanger" metadata_source="library|input_1" default_identifier_source="library|input_1" from_work_dir="too_short_output*" label="${tool.name} on ${on_string}: Too Short Read 1" >
366 <filter>output_selector and 'too_short_file' in output_selector</filter> 426 <filter>output_selector and 'too_short_file' in output_selector</filter>
367 <expand macro="inherit_format_1" /> 427 <expand macro="inherit_format_1" />
368 </data> 428 </data>
369 <data name="too_short_paired_output" format="fastqsanger" metadata_source="library|input_2" default_identifier_source="library|input_2" from_work_dir="too_short_paired_output*" label="${tool.name} on ${on_string}: Too Short Read 2" > 429 <data name="too_short_paired_output" format="fastqsanger" metadata_source="library|input_2" default_identifier_source="library|input_2" from_work_dir="too_short_paired_output*" label="${tool.name} on ${on_string}: Too Short Read 2" >
386 <collection name="split_output" type="list" label="${tool.name} on ${on_string}: Split outputs" format="fastqsanger" > 446 <collection name="split_output" type="list" label="${tool.name} on ${on_string}: Split outputs" format="fastqsanger" >
387 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fastq.*)" directory="split" /> 447 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fastq.*)" directory="split" />
388 <filter>output_selector and 'multiple_output' in output_selector</filter> 448 <filter>output_selector and 'multiple_output' in output_selector</filter>
389 </collection> 449 </collection>
390 </outputs> 450 </outputs>
391
392 <tests> 451 <tests>
393 <!-- Ensure fastq works --> 452 <!-- Ensure fastq works -->
394 <test expect_num_outputs="1"> 453 <test expect_num_outputs="1">
395 <param name="type" value="single" /> 454 <param name="type" value="single" />
396 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> 455 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
442 <output name="out1" decompress="true" file="cutadapt_out1.fq.gz" ftype="fastq.gz"/> 501 <output name="out1" decompress="true" file="cutadapt_out1.fq.gz" ftype="fastq.gz"/>
443 <output name="out2" decompress="true" file="cutadapt_out2.fq.gz" ftype="fastq.gz"/> 502 <output name="out2" decompress="true" file="cutadapt_out2.fq.gz" ftype="fastq.gz"/>
444 <assert_command> 503 <assert_command>
445 <not_has_text text="--discard-trimmed"/> 504 <not_has_text text="--discard-trimmed"/>
446 <not_has_text text="--discard-untrimmed"/> 505 <not_has_text text="--discard-untrimmed"/>
447 <not_has_text text="--minimum-length"/>
448 <not_has_text text="--maximum-length"/> 506 <not_has_text text="--maximum-length"/>
449 <not_has_text text="--max-n"/> 507 <not_has_text text="--max-n"/>
450 <has_text text="--pair-filter=any"/> 508 <has_text text="--minimum-length=1 "/>
451 </assert_command> 509 </assert_command>
452 </test> 510 </test>
453 <!-- Ensure paired collection works --> 511 <!-- Ensure paired collection works -->
454 <test expect_num_outputs="3"> 512 <test expect_num_outputs="3">
455 <param name="type" value="paired_collection" /> 513 <param name="type" value="paired_collection" />
511 <output name="out1" file="cutadapt_discard.out" ftype="fastq"/> 569 <output name="out1" file="cutadapt_discard.out" ftype="fastq"/>
512 <assert_command> 570 <assert_command>
513 <has_text text="--discard-trimmed"/> 571 <has_text text="--discard-trimmed"/>
514 </assert_command> 572 </assert_command>
515 </test> 573 </test>
574 <!-- Ensure pair-adapters option works -->
575 <test expect_num_outputs="2">
576 <param name="type" value="paired" />
577 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
578 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" />
579 <section name="r1">
580 <repeat name="adapters">
581 <conditional name="adapter_source">
582 <param name="adapter_source_list" value="user"/>
583 <param name="adapter" value="AGATCGGAAGAGC"/>
584 </conditional>
585 </repeat>
586 </section>
587 <section name="r2">
588 <repeat name="adapters2">
589 <conditional name="adapter_source">
590 <param name="adapter_source_list" value="user"/>
591 <param name="adapter" value="AGATCGGAAGAGC"/>
592 </conditional>
593 </repeat>
594 </section>
595 <param name="pair_adapters" value="true"/>
596 <output name="out1" decompress="true" file="cutadapt_out1_pair_adapters.fq.gz" ftype="fastq.gz"/>
597 <output name="out2" decompress="true" file="cutadapt_out2_pair_adapters.fq.gz" ftype="fastq.gz"/>
598 <assert_command>
599 <not_has_text text="--discard-trimmed"/>
600 <not_has_text text="--discard-untrimmed"/>
601 <not_has_text text="--maximum-length"/>
602 <not_has_text text="--max-n"/>
603 <has_text text="--pair-adapters"/>
604 <has_text text="--minimum-length=1 "/>
605 </assert_command>
606 </test>
516 <!-- Ensure rest file output works, test json output --> 607 <!-- Ensure rest file output works, test json output -->
517 <test expect_num_outputs="3"> 608 <test expect_num_outputs="3">
518 <param name="input_1" ftype="fasta" value="cutadapt_rest.fa" /> 609 <param name="input_1" ftype="fasta" value="cutadapt_rest.fa" />
519 <section name="r1"> 610 <section name="r1">
520 <repeat name="adapters"> 611 <repeat name="adapters">
524 </conditional> 615 </conditional>
525 </repeat> 616 </repeat>
526 </section> 617 </section>
527 <param name="output_selector" value="rest_file,json_stats"/> 618 <param name="output_selector" value="rest_file,json_stats"/>
528 <output name="out1" file="cutadapt_rest.out" ftype="fasta"/> 619 <output name="out1" file="cutadapt_rest.out" ftype="fasta"/>
529 <!--allow for differing schema, cutadapt and python version as well as two lines containing the number of cores --> 620 <!-- compare to expected reproducible part of json output -->
530 <output name="json_stats" file="cutadapt_rest.json" ftype="json" lines_diff="10"/> 621 <output name="json_stats" file="cutadapt_rest_json.txt" compare="contains"/>
531 <output name="rest_output" file="cutadapt_rest2.out" ftype="fasta"/> 622 <output name="rest_output" file="cutadapt_rest2.out" ftype="fasta"/>
532 </test> 623 </test>
533 <!-- Ensure nextseq-trim option works --> 624 <!-- Ensure nextseq-trim option works -->
534 <test expect_num_outputs="1"> 625 <test expect_num_outputs="1">
535 <param name="type" value="single" /> 626 <param name="type" value="single" />
540 <param name="adapter_source_list" value="user"/> 631 <param name="adapter_source_list" value="user"/>
541 <param name="adapter" value="AGATCGGAAGAGC"/> 632 <param name="adapter" value="AGATCGGAAGAGC"/>
542 </conditional> 633 </conditional>
543 </repeat> 634 </repeat>
544 </section> 635 </section>
545 <param name="nextseq_trim" value="20" /> 636 <section name="other_trimming_options">
637 <param name="nextseq_trim" value="20" />
638 </section>
546 <output name="out1" decompress="true" file="cutadapt_nextseq_out.fq.gz" ftype="fastq.gz"/> 639 <output name="out1" decompress="true" file="cutadapt_nextseq_out.fq.gz" ftype="fastq.gz"/>
547 </test> 640 </test>
548 <!-- Ensure Report and Info file output work --> 641 <!-- Ensure Report and Info file output work -->
549 <test expect_num_outputs="3"> 642 <test expect_num_outputs="3">
550 <param name="type" value="single" /> 643 <param name="type" value="single" />
564 <has_text text="Summary"/> 657 <has_text text="Summary"/>
565 </assert_contents> 658 </assert_contents>
566 </output> 659 </output>
567 <output name="info_file" value="cutadapt_info_out.txt" ftype="txt"/> 660 <output name="info_file" value="cutadapt_info_out.txt" ftype="txt"/>
568 </test> 661 </test>
569
570
571 <test expect_num_outputs="1"> 662 <test expect_num_outputs="1">
572 <conditional name="library"> 663 <conditional name="library">
573 <param name="type" value="single" /> 664 <param name="type" value="single" />
574 <param name="input_1" ftype="fastq" value="cutadapt_in_split.fastq" /> 665 <param name="input_1" ftype="fastq" value="cutadapt_in_split.fastq" />
575 <section name="r1" > 666 <section name="r1" >
597 </element> 688 </element>
598 <element name="unknown" value="unknown.fastq" ftype="fastq"> 689 <element name="unknown" value="unknown.fastq" ftype="fastq">
599 </element> 690 </element>
600 </output_collection> 691 </output_collection>
601 </test> 692 </test>
602
603 <test expect_num_outputs="1"> 693 <test expect_num_outputs="1">
604 <conditional name="library"> 694 <conditional name="library">
605 <param name="type" value="single" /> 695 <param name="type" value="single" />
606 <param name="input_1" ftype="fastq.gz" value="cutadapt_in_split.fastq.gz" /> 696 <param name="input_1" ftype="fastq.gz" value="cutadapt_in_split.fastq.gz" />
607 <section name="r1" > 697 <section name="r1" >
618 <element name="A1" decompress="true" file="A1.fastq.gz" ftype="fastq.gz"/> 708 <element name="A1" decompress="true" file="A1.fastq.gz" ftype="fastq.gz"/>
619 <element name="A2" decompress="true" file="A2.fastq.gz" ftype="fastq.gz"/> 709 <element name="A2" decompress="true" file="A2.fastq.gz" ftype="fastq.gz"/>
620 <element name="unknown" decompress="true" file="unknown.fastq.gz" ftype="fastq.gz"/> 710 <element name="unknown" decompress="true" file="unknown.fastq.gz" ftype="fastq.gz"/>
621 </output_collection> 711 </output_collection>
622 </test> 712 </test>
623
624 <!-- Ensure untrimmed file output works --> 713 <!-- Ensure untrimmed file output works -->
625 <test expect_num_outputs="2"> 714 <test expect_num_outputs="2">
626 <param name="type" value="single" /> 715 <param name="type" value="single" />
627 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> 716 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
628 <section name="r1"> 717 <section name="r1">
680 </conditional> 769 </conditional>
681 </repeat> 770 </repeat>
682 </section> 771 </section>
683 <section name="filter_options"> 772 <section name="filter_options">
684 <param name="discard_untrimmed" value="true"/> 773 <param name="discard_untrimmed" value="true"/>
685 <param name="minimun_length" value="1"/> 774 <param name="minimum_length" value="1"/>
686 <param name="maximum_length" value="1000"/> 775 <param name="maximum_length" value="1000"/>
687 <param name="max_n" value="0"/> 776 <param name="max_n" value="0"/>
688 <param name="pair_filter" value="both"/> 777 <param name="pair_filter" value="both"/>
689 </section> 778 </section>
690 <assert_command> 779 <assert_command>
691 <has_text text="--discard-untrimmed"/> 780 <has_text text="--discard-untrimmed"/>
692 <has_text text="--maximum-length=1000"/> 781 <has_text text="--minimum-length=1 " />
782 <has_text text="--maximum-length=1000 "/>
693 <has_text text="--max-n=0"/> 783 <has_text text="--max-n=0"/>
694 <has_text text="--pair-filter=both"/> 784 <has_text text="--pair-filter=both"/>
695 </assert_command> 785 </assert_command>
696 </test> 786 </test>
697 <!-- Test cut option --> 787 <!-- Test cut option -->
704 <param name="adapter_source_list" value="user"/> 794 <param name="adapter_source_list" value="user"/>
705 <param name="adapter" value="AGATCGGAAGAGC"/> 795 <param name="adapter" value="AGATCGGAAGAGC"/>
706 </conditional> 796 </conditional>
707 </repeat> 797 </repeat>
708 </section> 798 </section>
709 <param name="cut" value="5"/> 799 <section name="other_trimming_options">
800 <param name="cut" value="5"/>
801 </section>
710 <output name="out1" file="cutadapt_small_cut.out" ftype="fastq"/> 802 <output name="out1" file="cutadapt_small_cut.out" ftype="fastq"/>
711 </test> 803 </test>
712 <!-- Test rename options --> 804 <!-- Test rename options -->
713 <test expect_num_outputs="1"> 805 <test expect_num_outputs="1">
714 <param name="type" value="single" /> 806 <param name="type" value="single" />
719 <param name="adapter_source_list" value="user"/> 811 <param name="adapter_source_list" value="user"/>
720 <param name="adapter" value="AGATCGGAAGAGC"/> 812 <param name="adapter" value="AGATCGGAAGAGC"/>
721 </conditional> 813 </conditional>
722 </repeat> 814 </repeat>
723 </section> 815 </section>
724 <param name="cut" value="5"/> 816 <section name="other_trimming_options">
817 <param name="cut" value="5"/>
818 </section>
725 <section name="read_mod_options"> 819 <section name="read_mod_options">
726 <param name="rename" value="{id} barcode={cut_prefix}"/> 820 <param name="rename" value="{id} barcode={cut_prefix}"/>
727 </section> 821 </section>
728 <output name="out1" file="cutadapt_small_rename.out" ftype="fastq"> 822 <output name="out1" file="cutadapt_small_rename.out" ftype="fastq">
729 <assert_contents> 823 <assert_contents>
831 <conditional name="adapter_source"> 925 <conditional name="adapter_source">
832 <param name="adapter_source_list" value="user"/> 926 <param name="adapter_source_list" value="user"/>
833 <param name="adapter" value="CTACAAG"/> 927 <param name="adapter" value="CTACAAG"/>
834 </conditional> 928 </conditional>
835 </repeat> 929 </repeat>
836 <param name="minimum_length2" value="10"/>
837 </section> 930 </section>
838 <section name="filter_options"> 931 <section name="filter_options">
839 <param name="minimum_length" value="30"/> 932 <param name="minimum_length" value="30"/>
933 <param name="minimum_length2" value="10"/>
840 <param name="pair_filter" value="both"/> 934 <param name="pair_filter" value="both"/>
841 </section> 935 </section>
842 <output name="out1" decompress="true" file="cutadapt_out1_min_length.fq.gz" ftype="fastq.gz"/> 936 <output name="out1" decompress="true" file="cutadapt_out1_min_length.fq.gz" ftype="fastq.gz"/>
843 <output name="out2" decompress="true" file="cutadapt_out2_min_length.fq.gz" ftype="fastq.gz"/> 937 <output name="out2" decompress="true" file="cutadapt_out2_min_length.fq.gz" ftype="fastq.gz"/>
844 <assert_command> 938 <assert_command>
845 <has_text text="--minimum-length=30:10"/> 939 <has_text text="--minimum-length=30:10"/>
846 <has_text text="--pair-filter=both"/> 940 <has_text text="--pair-filter=both"/>
847 </assert_command> 941 </assert_command>
848 </test> 942 </test>
849 <!-- Test maximum lenghth paired-reads --> 943 <!-- Test maximum length paired-reads -->
850 <test expect_num_outputs="2"> 944 <test expect_num_outputs="2">
851 <param name="type" value="paired" /> 945 <param name="type" value="paired" />
852 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" /> 946 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
853 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" /> 947 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
854 <section name="r1"> 948 <section name="r1">
864 <conditional name="adapter_source"> 958 <conditional name="adapter_source">
865 <param name="adapter_source_list" value="user"/> 959 <param name="adapter_source_list" value="user"/>
866 <param name="adapter" value="AGATCGGAAGAGC"/> 960 <param name="adapter" value="AGATCGGAAGAGC"/>
867 </conditional> 961 </conditional>
868 </repeat> 962 </repeat>
869 <param name="maximum_length2" value="30"/>
870 </section> 963 </section>
871 <section name="filter_options"> 964 <section name="filter_options">
872 <param name="pair_filter" value="both"/> 965 <param name="pair_filter" value="both"/>
873 <param name="maximum_length" value="50"/> 966 <param name="maximum_length" value="50"/>
967 <param name="maximum_length2" value="30"/>
874 </section> 968 </section>
875 <output name="out1" decompress="true" file="cutadapt_out1_max_length.fq.gz" ftype="fastq.gz"/> 969 <output name="out1" decompress="true" file="cutadapt_out1_max_length.fq.gz" ftype="fastq.gz"/>
876 <output name="out2" decompress="true" file="cutadapt_out2_max_length.fq.gz" ftype="fastq.gz"/> 970 <output name="out2" decompress="true" file="cutadapt_out2_max_length.fq.gz" ftype="fastq.gz"/>
877 <assert_command> 971 <assert_command>
878 <has_text text="--maximum-length=50:30"/> 972 <has_text text="--maximum-length=50:30"/>
897 <conditional name="adapter_source2"> 991 <conditional name="adapter_source2">
898 <param name="adapter_source_list2" value="user"/> 992 <param name="adapter_source_list2" value="user"/>
899 <param name="adapter2" value="AGATCGGAAGAGC"/> 993 <param name="adapter2" value="AGATCGGAAGAGC"/>
900 </conditional> 994 </conditional>
901 </repeat> 995 </repeat>
902 <param name="maximum_length2" value="30"/>
903 </section> 996 </section>
904 <section name="filter_options"> 997 <section name="filter_options">
905 <param name="pair_filter" value="both"/> 998 <param name="pair_filter" value="both"/>
906 <param name="minimum_length" value="10"/> 999 <param name="minimum_length" value="10"/>
907 <param name="maximum_length" value="50"/> 1000 <param name="maximum_length" value="50"/>
1001 <param name="maximum_length2" value="30"/>
908 </section> 1002 </section>
909 <output name="out1" decompress="true" file="cutadapt_out1_max_min_01.fq.gz" ftype="fastq.gz"/> 1003 <output name="out1" decompress="true" file="cutadapt_out1_max_min_01.fq.gz" ftype="fastq.gz"/>
910 <output name="out2" decompress="true" file="cutadapt_out2_max_min_01.fq.gz" ftype="fastq.gz"/> 1004 <output name="out2" decompress="true" file="cutadapt_out2_max_min_01.fq.gz" ftype="fastq.gz"/>
911 <assert_command> 1005 <assert_command>
912 <has_text text="--minimum-length=10"/> 1006 <has_text text="--minimum-length=10 "/>
913 <has_text text="--maximum-length=50:30"/> 1007 <has_text text="--maximum-length=50:30"/>
914 <has_text text="--pair-filter=both"/> 1008 <has_text text="--pair-filter=both"/>
915 </assert_command> 1009 </assert_command>
916 </test> 1010 </test>
917 <test expect_num_outputs="2"> 1011 <test expect_num_outputs="2">
931 <conditional name="adapter_source2"> 1025 <conditional name="adapter_source2">
932 <param name="adapter_source_list2" value="user"/> 1026 <param name="adapter_source_list2" value="user"/>
933 <param name="adapter2" value="AGATCGGAAGAGC"/> 1027 <param name="adapter2" value="AGATCGGAAGAGC"/>
934 </conditional> 1028 </conditional>
935 </repeat> 1029 </repeat>
936 <param name="minimum_length2" value="10"/>
937 </section> 1030 </section>
938 <section name="filter_options"> 1031 <section name="filter_options">
939 <param name="pair_filter" value="both"/> 1032 <param name="pair_filter" value="both"/>
940 <param name="minimum_length" value="10"/> 1033 <param name="minimum_length" value="10"/>
1034 <param name="minimum_length2" value="10"/>
941 <param name="maximum_length" value="50"/> 1035 <param name="maximum_length" value="50"/>
942 </section> 1036 </section>
943 <output name="out1" decompress="true" file="cutadapt_out1_max_min_02.fq.gz" ftype="fastq.gz"/> 1037 <output name="out1" decompress="true" file="cutadapt_out1_max_min_02.fq.gz" ftype="fastq.gz"/>
944 <output name="out2" decompress="true" file="cutadapt_out2_max_min_02.fq.gz" ftype="fastq.gz"/> 1038 <output name="out2" decompress="true" file="cutadapt_out2_max_min_02.fq.gz" ftype="fastq.gz"/>
945 <assert_command> 1039 <assert_command>
946 <has_text text="--minimum-length=10:10"/> 1040 <has_text text="--minimum-length=10:10"/>
947 <has_text text="--maximum-length=50"/> 1041 <has_text text="--maximum-length=50 "/>
948 <has_text text="--pair-filter=both"/> 1042 <has_text text="--pair-filter=both"/>
949 </assert_command> 1043 </assert_command>
950 </test> 1044 </test>
951 <!-- Test length options --> 1045 <!-- Test length options -->
952 <test expect_num_outputs="1"> 1046 <test expect_num_outputs="1">
958 <param name="adapter_source_list" value="user"/> 1052 <param name="adapter_source_list" value="user"/>
959 <param name="adapter" value="AGCCGCTANGACG"/> 1053 <param name="adapter" value="AGCCGCTANGACG"/>
960 </conditional> 1054 </conditional>
961 </repeat> 1055 </repeat>
962 </section> 1056 </section>
963 <section name="read_mod_options"> 1057 <section name="other_trimming_options">
964 <conditional name="shorten_options"> 1058 <conditional name="shorten_options">
965 <param name="shorten_values" value="True"/> 1059 <param name="shorten_values" value="True"/>
966 <param name="shorten_end" value="3prime"/> 1060 <param name="shorten_end" value="3prime"/>
967 <param name="length" value="10"/> 1061 <param name="length" value="10"/>
968 </conditional> 1062 </conditional>
969 </section> 1063 </section>
1064 <section name="filter_options">
1065 <param name="minimum_length" value="0"/>
1066 </section>
970 <output name="out1" file="cutadapt_shorten_3prime.out" ftype="fastq"/> 1067 <output name="out1" file="cutadapt_shorten_3prime.out" ftype="fastq"/>
971 </test> 1068 </test>
972 <test expect_num_outputs="1"> 1069 <test expect_num_outputs="1">
973 <param name="type" value="single" /> 1070 <param name="type" value="single" />
974 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> 1071 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
978 <param name="adapter_source_list" value="user"/> 1075 <param name="adapter_source_list" value="user"/>
979 <param name="adapter" value="AGCCGCTANGACG"/> 1076 <param name="adapter" value="AGCCGCTANGACG"/>
980 </conditional> 1077 </conditional>
981 </repeat> 1078 </repeat>
982 </section> 1079 </section>
983 <section name="read_mod_options"> 1080 <section name="other_trimming_options">
984 <conditional name="shorten_options"> 1081 <conditional name="shorten_options">
985 <param name="shorten_values" value="True"/> 1082 <param name="shorten_values" value="True"/>
986 <param name="shorten_end" value="5prime"/> 1083 <param name="shorten_end" value="5prime"/>
987 <param name="length" value="10"/> 1084 <param name="length" value="10"/>
988 </conditional> 1085 </conditional>
1086 </section>
1087 <section name="filter_options">
1088 <param name="minimum_length" value="0"/>
989 </section> 1089 </section>
990 <output name="out1" file="cutadapt_shorten_5prime.out" ftype="fastq"/> 1090 <output name="out1" file="cutadapt_shorten_5prime.out" ftype="fastq"/>
991 </test> 1091 </test>
992 <!-- Test max expected errors options --> 1092 <!-- Test max expected errors options -->
993 <test expect_num_outputs="1"> 1093 <test expect_num_outputs="1">
1105 <collection type="paired"> 1205 <collection type="paired">
1106 <element name="forward" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" /> 1206 <element name="forward" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
1107 <element name="reverse" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" /> 1207 <element name="reverse" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" />
1108 </collection> 1208 </collection>
1109 </param> 1209 </param>
1110 <param name="quality_cutoff" value="5" />
1111 <section name="r1"> 1210 <section name="r1">
1112 <repeat name="adapters"> 1211 <repeat name="adapters">
1113 <conditional name="adapter_source"> 1212 <conditional name="adapter_source">
1114 <param name="adapter_source_list" value="user"/> 1213 <param name="adapter_source_list" value="user"/>
1115 <param name="adapter" value="AGATCGGAAGAGC"/> 1214 <param name="adapter" value="AGATCGGAAGAGC"/>
1121 <conditional name="adapter_source"> 1220 <conditional name="adapter_source">
1122 <param name="adapter_source_list" value="user"/> 1221 <param name="adapter_source_list" value="user"/>
1123 <param name="adapter" value="AGATCGGAAGAGC"/> 1222 <param name="adapter" value="AGATCGGAAGAGC"/>
1124 </conditional> 1223 </conditional>
1125 </repeat> 1224 </repeat>
1225 </section>
1226 <section name="other_trimming_options">
1227 <param name="quality_cutoff" value="5"/>
1126 <param name="quality_cutoff2" value="15,20"/> 1228 <param name="quality_cutoff2" value="15,20"/>
1127 </section> 1229 </section>
1128 <output_collection name="out_pairs" type="paired"> 1230 <output_collection name="out_pairs" type="paired">
1129 <element name="forward" decompress="true" file="cutadapt_out1.fq.gz" ftype="fastq.gz" /> 1231 <element name="forward" decompress="true" file="cutadapt_out1.fq.gz" ftype="fastq.gz" />
1130 <element name="reverse" decompress="true" file="cutadapt_out2_cutoff.fq.gz" ftype="fastq.gz" /> 1232 <element name="reverse" decompress="true" file="cutadapt_out2_cutoff.fq.gz" ftype="fastq.gz" />
1131 </output_collection> 1233 </output_collection>
1132 <assert_command> 1234 <assert_command>
1133 <has_text text="--quality-cutoff=5"/> 1235 <has_text text="--quality-cutoff=5"/>
1134 <has_text text="-Q=15,20"/> 1236 <has_text text="-Q 15,20"/>
1135 </assert_command> 1237 </assert_command>
1136 </test> 1238 </test>
1137 1239
1138 <!-- ployA trimmining https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_paired.py#L768 --> 1240 <!-- ployA trimmining https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_paired.py#L768 -->
1139 <test expect_num_outputs="3"> 1241 <test expect_num_outputs="3">
1142 <collection type="paired"> 1244 <collection type="paired">
1143 <element name="forward" ftype="fasta" value="cutadapt/data/polya.1.fasta" /> 1245 <element name="forward" ftype="fasta" value="cutadapt/data/polya.1.fasta" />
1144 <element name="reverse" ftype="fasta" value="cutadapt/data/polya.2.fasta" /> 1246 <element name="reverse" ftype="fasta" value="cutadapt/data/polya.2.fasta" />
1145 </collection> 1247 </collection>
1146 </param> 1248 </param>
1147 <param name="poly_a" value="true"/> 1249 <section name="other_trimming_options">
1250 <param name="poly_a" value="true"/>
1251 </section>
1148 <output_collection name="out_pairs" type="paired"> 1252 <output_collection name="out_pairs" type="paired">
1149 <element name="forward" file="cutadapt/cut/polya.1.fasta" ftype="fasta" /> 1253 <element name="forward" file="cutadapt/cut/polya.1.fasta" ftype="fasta" />
1150 <element name="reverse" file="cutadapt/cut/polya.2.fasta" ftype="fasta" /> 1254 <element name="reverse" file="cutadapt/cut/polya.2.fasta" ftype="fasta" />
1151 </output_collection> 1255 </output_collection>
1152 <assert_command> 1256 <assert_command>
1196 1300
1197 <!-- polyA https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_commandline.py#L281 --> 1301 <!-- polyA https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_commandline.py#L281 -->
1198 <test expect_num_outputs="1"> 1302 <test expect_num_outputs="1">
1199 <param name="type" value="single" /> 1303 <param name="type" value="single" />
1200 <param name="input_1" ftype="fasta" value="cutadapt/data/polya.1.fasta" /> 1304 <param name="input_1" ftype="fasta" value="cutadapt/data/polya.1.fasta" />
1201 <param name="poly_a" value="true"/> 1305 <section name="other_trimming_options">
1306 <param name="poly_a" value="true"/>
1307 </section>
1202 <output name="out1" file="cutadapt/cut/polya.1.fasta" ftype="fasta"/> 1308 <output name="out1" file="cutadapt/cut/polya.1.fasta" ftype="fasta"/>
1203 <assert_command> 1309 <assert_command>
1204 <has_text text="--poly-a"/> 1310 <has_text text="--poly-a"/>
1205 </assert_command> 1311 </assert_command>
1206 </test> 1312 </test>
1207
1208 <!-- fasta.gz input https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_commandline.py#L78 1313 <!-- fasta.gz input https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_commandline.py#L78
1209 in contrast to the original test the tool automatically compresses the output --> 1314 in contrast to the original test the tool automatically compresses the output -->
1210 <test expect_num_outputs="1"> 1315 <test expect_num_outputs="1">
1211 <param name="type" value="single" /> 1316 <param name="type" value="single" />
1212 <param name="input_1" ftype="fasta.gz" value="cutadapt/data/simple.fasta.gz" /> 1317 <param name="input_1" ftype="fasta.gz" value="cutadapt/data/simple.fasta.gz" />
1213 <output name="out1" decompress="true" file="cutadapt/cut/simple.fasta.gz" ftype="fasta.gz"/> 1318 <output name="out1" decompress="true" file="cutadapt/cut/simple.fasta.gz" ftype="fasta.gz"/>
1214 </test> 1319 </test>
1215
1216 <!-- fatstqillumina input https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_commandline.py#L252 --> 1320 <!-- fatstqillumina input https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_commandline.py#L252 -->
1217 <test expect_num_outputs="1"> 1321 <test expect_num_outputs="1">
1218 <param name="type" value="single" /> 1322 <param name="type" value="single" />
1219 <param name="input_1" ftype="fastqillumina" value="cutadapt/data/illumina64.fastq" /> 1323 <param name="input_1" ftype="fastqillumina" value="cutadapt/data/illumina64.fastq" />
1220 <section name="r1"> 1324 <section name="r1">
1223 <param name="adapter_source_list" value="user"/> 1327 <param name="adapter_source_list" value="user"/>
1224 <param name="adapter" value="XXXXXX"/> 1328 <param name="adapter" value="XXXXXX"/>
1225 </conditional> 1329 </conditional>
1226 </repeat> 1330 </repeat>
1227 </section> 1331 </section>
1228 <param name="quality_cutoff" value="10"/> 1332 <section name="other_trimming_options">
1333 <param name="quality_cutoff" value="10"/>
1334 </section>
1335 <section name="filter_options">
1336 <param name="minimum_length" value="0"/>
1337 </section>
1229 <output name="out1" file="cutadapt/cut/illumina64.fastq" ftype="fastqillumina"/> 1338 <output name="out1" file="cutadapt/cut/illumina64.fastq" ftype="fastqillumina"/>
1230 <assert_command> 1339 <assert_command>
1231 <has_text text="-a 'XXXXXX'"/> 1340 <has_text text="-a 'XXXXXX'"/>
1232 <has_text text="--quality-cutoff=10"/> 1341 <has_text text="--quality-cutoff=10"/>
1233 <has_text text="--quality-base=64"/> 1342 <has_text text="--quality-base=64"/>
1343 <not_has_text text="--minimum-length "/>
1234 </assert_command> 1344 </assert_command>
1235 </test> 1345 </test>
1236 <!-- fatstqillumina input https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_commandline.py#L257 --> 1346 <!-- fatstqillumina input https://github.com/marcelm/cutadapt/blob/e04cc32e392e1cbe0c518b4e0637cdf03533d440/tests/test_commandline.py#L257 -->
1237 <test expect_num_outputs="1"> 1347 <test expect_num_outputs="1">
1238 <param name="type" value="single" /> 1348 <param name="type" value="single" />
1239 <param name="input_1" ftype="fastqillumina" value="cutadapt/data/illumina64.fastq" /> 1349 <param name="input_1" ftype="fastqillumina" value="cutadapt/data/illumina64.fastq" />
1240 <param name="quality_cutoff" value="10"/> 1350 <section name="other_trimming_options">
1351 <param name="quality_cutoff" value="10"/>
1352 </section>
1353 <section name="filter_options">
1354 <param name="minimum_length" value="0"/>
1355 </section>
1241 <output name="out1" file="cutadapt/cut/illumina64.fastq" ftype="fastqillumina"/> 1356 <output name="out1" file="cutadapt/cut/illumina64.fastq" ftype="fastqillumina"/>
1242 <assert_command> 1357 <assert_command>
1243 <has_text text="--quality-cutoff=10"/> 1358 <has_text text="--quality-cutoff=10"/>
1244 <has_text text="--quality-base=64"/> 1359 <has_text text="--quality-base=64"/>
1245 </assert_command> 1360 <not_has_text text="--minimum-length"/>
1246 </test> 1361 </assert_command>
1247 1362 </test>
1248 <!-- https://github.com/marcelm/cutadapt/blame/3407ac0004d04b11ae7157934a6665ecaf82c328/tests/test_commandline.py#L1028 --> 1363 <!-- https://github.com/marcelm/cutadapt/blame/3407ac0004d04b11ae7157934a6665ecaf82c328/tests/test_commandline.py#L1028 -->
1249 <test expect_num_outputs="3"> 1364 <test expect_num_outputs="3">
1250 <param name="type" value="single" /> 1365 <param name="type" value="single" />
1251 <param name="input_1" ftype="fastqillumina" value="cutadapt/data/maxee.fastq" /> 1366 <param name="input_1" ftype="fastqillumina" value="cutadapt/data/maxee.fastq" />
1252 <param name="max_expected_errors" value="0.9"/> 1367 <section name="filter_options">
1368 <param name="minimum_length" value="0"/>
1369 <param name="max_expected_errors" value="0.9"/>
1370 </section>
1253 <param name="output_selector" value="report,info_file"/> 1371 <param name="output_selector" value="report,info_file"/>
1254 <output name="out1" file="cutadapt/cut/maxee.fastq" ftype="fastqillumina"/> 1372 <output name="out1" file="cutadapt/cut/maxee.fastq" ftype="fastqillumina"/>
1255 <output name="report"> 1373 <output name="report">
1256 <assert_contents> 1374 <assert_contents>
1257 <has_text text="Reads with too many exp. errors: 2"/> 1375 <has_text text="Reads with too many exp. errors: 2"/>
1262 <has_n_lines n="4"/> 1380 <has_n_lines n="4"/>
1263 <has_text text="ee_0.8&#09;-1&#09;ACGTTGCA&#09;++++++++"/> 1381 <has_text text="ee_0.8&#09;-1&#09;ACGTTGCA&#09;++++++++"/>
1264 </assert_contents> 1382 </assert_contents>
1265 </output> 1383 </output>
1266 <assert_command> 1384 <assert_command>
1267 <has_text text="--max-expected-errors=0.9"/> 1385 <has_text text="--max-ee=0.9"/>
1268 </assert_command> 1386 </assert_command>
1269 </test> 1387 </test>
1270
1271 <!-- https://github.com/marcelm/cutadapt/blob/3407ac0004d04b11ae7157934a6665ecaf82c328/tests/test_commandline.py#L335 --> 1388 <!-- https://github.com/marcelm/cutadapt/blob/3407ac0004d04b11ae7157934a6665ecaf82c328/tests/test_commandline.py#L335 -->
1272 <test expect_num_outputs="1"> 1389 <test expect_num_outputs="1">
1273 <param name="type" value="single" /> 1390 <param name="type" value="single" />
1274 <param name="input_1" ftype="fasta" value="cutadapt/data/wildcard.fa" /> 1391 <param name="input_1" ftype="fasta" value="cutadapt/data/wildcard.fa" />
1275 <section name="r1"> 1392 <section name="r1">
1287 <assert_command> 1404 <assert_command>
1288 <has_text text="--match-read-wildcards"/> 1405 <has_text text="--match-read-wildcards"/>
1289 <has_text text="-b 'ACGTACGT'"/> 1406 <has_text text="-b 'ACGTACGT'"/>
1290 </assert_command> 1407 </assert_command>
1291 </test> 1408 </test>
1292
1293 <!-- https://github.com/marcelm/cutadapt/blob/3407ac0004d04b11ae7157934a6665ecaf82c328/tests/test_commandline.py#L368 --> 1409 <!-- https://github.com/marcelm/cutadapt/blob/3407ac0004d04b11ae7157934a6665ecaf82c328/tests/test_commandline.py#L368 -->
1294 <test expect_num_outputs="1"> 1410 <test expect_num_outputs="1">
1295 <param name="type" value="single" /> 1411 <param name="type" value="single" />
1296 <param name="input_1" ftype="fasta" value="cutadapt/data/wildcardN.fa" /> 1412 <param name="input_1" ftype="fasta" value="cutadapt/data/wildcardN.fa" />
1297 <section name="r1"> 1413 <section name="r1">
1344 1460
1345 .. class:: infomark 1461 .. class:: infomark
1346 1462
1347 **What it does** 1463 **What it does**
1348 1464
1349 -------------------
1350
1351 **Cutadapt** finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. 1465 **Cutadapt** finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads.
1352 1466
1353 Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ sequencing adapter because the read is longer than the molecule that is sequenced, such as in microRNA, or CRISPR data, or Poly-A tails that are useful for pulling out RNA from your sample but often you don’t want them to be in your reads. 1467 Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ sequencing adapter because the read is longer than the molecule that is sequenced, such as in microRNA, or CRISPR data, or Poly-A tails that are useful for pulling out RNA from your sample but often you don’t want them to be in your reads.
1354 1468
1355 Cutadapt_ helps with these trimming tasks by finding the adapter or primer sequences in an error-tolerant way. It can also modify and filter reads in various ways. Cutadapt searches for the adapter in all reads and removes it when it finds it. Unless you use a filtering option, all reads that were present in the input file will also be present in the output file, some of them trimmed, some of them not. Even reads that were trimmed entirely (because the adapter was found in the very beginning) are output. All of this can be changed with options in the tool form above. 1469 Cutadapt helps with these trimming tasks by finding the adapter or primer sequences in an error-tolerant way. It can also modify and filter reads in various ways. Cutadapt searches for the adapter in all reads and removes it when it finds it. Unless you use a filtering option, all reads that were present in the input file will also be present in the output file, some of them trimmed, some of them not. Even reads that were trimmed entirely (because the adapter was found in the very beginning) are output. All of this can be changed with options in the tool form above.
1356 1470
1357 The tool is based on the **Open Source** Cutadapt_ tool. See the complete `Cutadapt documentation`_ for additional details. If you use Cutadapt, please cite *Marcel, 2011* under **Citations** below. 1471 See the complete `Cutadapt documentation`_ for additional details.
1358 1472
1359 ------------------- 1473 If you use Cutadapt, please cite *Marcel, 2011* under **Citations** below.
1360 1474
1361 **Inputs** 1475 -----
1362 1476
1363 ------------------- 1477 Input Sequences
1364 1478 ***************
1365 Input files for Cutadapt need to be: 1479
1366 1480 Accepted input formats for the tool are:
1367 - FASTQ.GZ, FASTQ.BZ2, FASTQ or FASTA 1481
1482 - FASTQ.GZ
1483 - FASTQ.BZ2
1484 - FASTQ or
1485 - FASTA
1486
1487 -----
1488
1489 Specifying Adapters
1490 *******************
1368 1491
1369 To trim an adapter, input the ADAPTER sequence in plain text or in a FASTA file e.g. AACCGGTT (with the characters: **$**, **^**, **...**, if anchored or linked). 1492 To trim an adapter, input the ADAPTER sequence in plain text or in a FASTA file e.g. AACCGGTT (with the characters: **$**, **^**, **...**, if anchored or linked).
1370 1493
1371 ============================================= =================== 1494 ============================================= ===================
1372 **Option** **Sequence** 1495 **Option** **Sequence**
1414 1537
1415 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT 1538 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
1416 1539
1417 The adapter sequences can be found in the document `Illumina TruSeq Adapters De-Mystified`_. 1540 The adapter sequences can be found in the document `Illumina TruSeq Adapters De-Mystified`_.
1418 1541
1542
1419 ----------- 1543 -----------
1420 1544
1421 **Outputs** 1545 **Paired Adapters**
1422 1546
1423 ----------- 1547 -----------
1548
1549 Normally, the tool looks for adapters on R1 and R2 reads independently. That is, the best matching R1 adapter of each type (3' End, 5' End, Anywhere) is removed from R1 and the best matching R2 adapter of each type is removed from R2.
1550
1551 To change this, you can use the **Pairwise adapter search** (--pair-adapters) option, which causes each R1 adapter to be paired up with its corresponding R2 adapter. The first R1 adapter of a given type that you specify will be paired up with the first R2 adapter of that type, and so on. The adapters are then always removed in pairs from a read pair.
1552
1553 For example, if you specify the following two 3'-end adapters for the R1 reads:
1554
1555 - ``AAAAA``
1556 - ``GGGGG``
1557
1558 and these two 3'-end adapters for the R2 reads:
1559
1560 - ``CCCC``
1561 - ``TTTT``
1562
1563 then, with this option enabled, the tool will trim a pair of reads only if:
1564
1565 - either ``AAAAA`` is found in R1 and ``CCCCC`` is found in R2,
1566 - or ``GGGG`` is found in R1 and ``TTTT`` is found in R2.
1567
1568 Two limitations exist in this mode:
1569
1570 1. You need to provide equal numbers of R1 and R2 adapters of each type to allow pair formation, or the tool run will fail.
1571 2. The algorithm identifies the best-matching R1 adapter first and then checks whether it can find its corresponding R2 adapter. If not, the read pair remains unchanged, even though it is, in theory, possible that a different R1 adapter that does not fit as well would have had a corresponding R2 adapter present, i.e., some legitimate adapter pairs might remain unhandled.
1572
1573 This mode is useful, for example, for `demultiplexing Illumina unique dual indices (UDIs)`_.
1574
1575 -----
1576
1577 Outputs
1578 *******
1424 1579
1425 - Trimmed reads 1580 - Trimmed reads
1426 1581
1427 Optionally, under **Output Options** you can choose to output 1582 Optionally, under **Output Options** you can choose to output
1428 1583
1429 * Report 1584 * Report
1430 * Info file 1585 * Info file
1431 1586
1587 -----------
1432 1588
1433 **Report** 1589 **Report**
1434 1590
1435 Cutadapt can output per-adapter statistics if you select to output the report above. 1591 -----------
1592
1593 Cutadapt can output per-adapter statistics if you select to generate the report above.
1436 1594
1437 Example: 1595 Example:
1438 1596
1439 :: 1597 ::
1440 1598
1459 Read 2: 24,144 bp 1617 Read 2: 24,144 bp
1460 Total written (filtered): 48,171 bp (99.8%) 1618 Total written (filtered): 48,171 bp (99.8%)
1461 Read 1: 24,090 bp 1619 Read 1: 24,090 bp
1462 Read 2: 24,081 bp 1620 Read 2: 24,081 bp
1463 1621
1622 -----------
1464 1623
1465 **Info file** 1624 **Info file**
1625
1626 -----------
1466 1627
1467 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file. 1628 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file.
1468 1629
1469 Columns contain the following data: 1630 Columns contain the following data:
1470 1631
1491 1652
1492 When parsing the file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line. 1653 When parsing the file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line.
1493 1654
1494 If the --times option is used and greater than 1, each read can appear more than once in the info file. There will be one line for each found adapter, all with identical read names. Only for the first of those lines will the concatenation of columns 5-7 be identical to the original read sequence (and accordingly for columns 9-11). For subsequent lines, the shown sequence are the ones that were used in subsequent rounds of adapter trimming, that is, they get successively shorter. 1655 If the --times option is used and greater than 1, each read can appear more than once in the info file. There will be one line for each found adapter, all with identical read names. Only for the first of those lines will the concatenation of columns 5-7 be identical to the original read sequence (and accordingly for columns 9-11). For subsequent lines, the shown sequence are the ones that were used in subsequent rounds of adapter trimming, that is, they get successively shorter.
1495 1656
1496
1497 -------------------- 1657 --------------------
1498 1658
1499 **Rename Reads** 1659 Renaming Reads
1500 1660 **************
1501 --------------------
1502
1503 1661
1504 The --rename option expects a template string such as {id} extra_info {adapter_name} as a parameter. It can contain regular text and placeholders that consist of a name enclosed in curly braces ({placeholdername}). 1662 The --rename option expects a template string such as {id} extra_info {adapter_name} as a parameter. It can contain regular text and placeholders that consist of a name enclosed in curly braces ({placeholdername}).
1505 1663
1506 The read name will be set to the template string in which the placeholders are replaced with the actual values relevant for the current read. 1664 The read name will be set to the template string in which the placeholders are replaced with the actual values relevant for the current read.
1507 1665
1521 For paired-end data, the placeholder {rn} is available (“read number”), and it is replaced with 1 in R1 and with 2 in R2. 1679 For paired-end data, the placeholder {rn} is available (“read number”), and it is replaced with 1 in R1 and with 2 in R2.
1522 1680
1523 In addition, it is possible to write a placeholder as {r1.placeholdername} or {r2.placeholdername}, which always takes the replacement value from R1 or R2, respectively. 1681 In addition, it is possible to write a placeholder as {r1.placeholdername} or {r2.placeholdername}, which always takes the replacement value from R1 or R2, respectively.
1524 The {r1.placeholder} and {r2.placeholder} notation is available for all placeholders except {rn} and {id} because the read ID needs to be identical for both reads. 1682 The {r1.placeholder} and {r2.placeholder} notation is available for all placeholders except {rn} and {id} because the read ID needs to be identical for both reads.
1525 1683
1526 -------------------- 1684 -----
1527 1685
1528 **More Information** 1686 **Galaxy Wrapper Development**
1529 1687
1530 -------------------- 1688 Original author: Lance Parsons <lparsons@princeton.edu>
1531 1689
1532 See the excellent `Cutadapt documentation`_ 1690 -----
1533 1691
1534 .. _Cutadapt: https://cutadapt.readthedocs.io/en/stable/ 1692 .. _`Cutadapt documentation`: https://cutadapt.readthedocs.io
1535 .. _`Cutadapt documentation`: https://cutadapt.readthedocs.io/en/latest/index.html
1536 .. _`Illumina TruSeq Adapters De-Mystified`: http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf 1693 .. _`Illumina TruSeq Adapters De-Mystified`: http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf
1537 1694 .. _`demultiplexing Illumina unique dual indices (UDIs)`: https://cutadapt.readthedocs.io/en/stable/guide.html#unique-dual-indices
1538
1539 --------------------
1540
1541 **Galaxy Wrapper Development**
1542
1543 --------------------
1544
1545 Author: Lance Parsons <lparsons@princeton.edu>
1546
1547 ]]></help> 1695 ]]></help>
1548 <expand macro="citations" /> 1696 <expand macro="citations" />
1549 </tool> 1697 </tool>