comparison cutadapt.xml @ 0:60b449221e6a draft

Uploaded
author jackcurragh
date Wed, 13 Apr 2022 09:15:28 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:60b449221e6a
1 <tool id="cutadapt" name="Cutadapt" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>Remove adapter sequences from FASTQ/FASTA</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro='edam_ontology' />
7 <expand macro='xrefs'/>
8 <expand macro='requirements' />
9 <version_command>cutadapt --version</version_command>
10
11 <command detect_errors="exit_code"><![CDATA[
12 ## Link in the input and output files, so Cutadapt can tell their type
13
14 #import re
15 #set read1 = "input_f"
16 #set read2 = "input_r"
17 #set paired = False
18 #set library_type = str($library.type)
19 #if $library_type == 'paired':
20 #set paired = True
21 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier))
22 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_2.element_identifier))
23 #set input_1 = $library.input_1
24 #set input_2 = $library.input_2
25 #else if $library_type == 'paired_collection'
26 #set paired = True
27 #set input_1 = $library.input_1.forward
28 #set input_2 = $library.input_1.reverse
29 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_1"
30 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_2"
31 #else
32 #set input_1 = $library.input_1
33 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier))
34 #end if
35
36 #if $input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
37 #set ext = ".fq.gz"
38 #else if $input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
39 #set ext = ".fq.bz2"
40 #else if $input_1.is_of_type('fasta'):
41 #set ext = ".fa"
42 #else:
43 #set ext = ".fq"
44 #end if
45 #set read1 = $read1 + $ext
46 #set out1 = "out1" + $ext
47 #set rest_output = "rest_output" + $ext
48 #set wild_output = "wild_output" + $ext
49 #set too_short_output = "too_short_output" + $ext
50 #set too_long_output = "too_long_output" + $ext
51 #set untrimmed_output = "untrimmed_output" + $ext
52 ln -f -s '${input_1}' '$read1' &&
53
54 #if $paired:
55 #if $input_2.is_of_type("fastq.gz", "fastqsanger.gz"):
56 #set ext2 = ".fq.gz"
57 #else if $input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"):
58 #set ext2 = ".fq.gz"
59 #else if $input_2.is_of_type('fasta'):
60 #set ext2 = ".fa"
61 #else:
62 #set ext2 = ".fq"
63 #end if
64 #set read2 = $read2 + $ext2
65 #set out2 = "out2" + $ext2
66 #set too_short_paired_output = "too_short_paired_output" + $ext2
67 #set too_long_paired_output = "too_long_paired_output" + $ext2
68 #set untrimmed_paired_output = "untrimmed_paired_output" + $ext2
69 ln -f -s '${input_2}' '$read2' &&
70 #end if
71
72 ## Run Cutadapt
73
74 #if 'multiple_output' in $output_selector:
75 mkdir split &&
76 #end if
77
78 cutadapt
79
80 -j=\${GALAXY_SLOTS:-4}
81
82 #if 'json_stats' in $output_selector:
83 --json stats.json
84 #end if
85
86 #if str( $library.type ) == "single":
87 @read1_options@
88 #if 'multiple_output' in $output_selector:
89 --output='split/{name}.${input_1.ext}'
90 #else:
91 --output='$out1'
92 #end if
93 #else:
94 @read1_options@
95 @read2_options@
96 --output='$out1'
97 --paired-output='$out2'
98 #end if
99
100 --error-rate=$adapter_options.error_rate
101 --times=$adapter_options.times
102 --overlap=$adapter_options.overlap
103 $adapter_options.no_indels
104 $adapter_options.match_read_wildcards
105 --action=$adapter_options.action
106 $adapter_options.revcomp
107
108 $filter_options.discard_trimmed
109 $filter_options.discard_untrimmed
110
111 #if str($filter_options.minimum_length) and str($filter_options.length_R2_options.length_R2_status) == 'True':
112 --minimum-length=$filter_options.minimum_length:$filter_options.length_R2_options.R2_minimum
113 #else if str($filter_options.minimum_length):
114 --minimum-length=$filter_options.minimum_length
115 #end if
116 #if str($filter_options.maximum_length) and str($filter_options.length_R2_options.length_R2_status) == 'True':
117 --maximum-length=$filter_options.maximum_length:$filter_options.length_R2_options.R2_maximum
118 #else if str($filter_options.maximum_length):
119 --maximum-length=$filter_options.maximum_length
120 #end if
121 #if str($filter_options.max_n):
122 --max-n=$filter_options.max_n
123 #end if
124 #if str( $library.type ) != "single":
125 #if $filter_options.pair_filter:
126 --pair-filter=$filter_options.pair_filter
127 #end if
128 #end if
129 #if str($filter_options.max_expected_errors):
130 --max-expected-errors=$filter_options.max_expected_errors
131 #end if
132 $filter_options.discard_cassava
133
134 #if str($read_mod_options.quality_cutoff) != '0':
135 --quality-cutoff=$read_mod_options.quality_cutoff
136 #end if
137 #if str($read_mod_options.nextseq_trim) != '0':
138 --nextseq-trim=$read_mod_options.nextseq_trim
139 #end if
140 $read_mod_options.trim_n
141 #if $read_mod_options.strip_suffix != ''
142 --strip-suffix $read_mod_options.strip_suffix
143 #end if
144 #if str($read_mod_options.shorten_options.shorten_values) == 'True':
145 #if str($read_mod_options.shorten_options.shorten_end) == '3prime'
146 --length=$read_mod_options.shorten_options.length
147 #else
148 --length=-$read_mod_options.shorten_options.length
149 #end if
150 #end if
151 #if str($read_mod_options.length_tag) != '':
152 --length-tag='$read_mod_options.length_tag'
153 #end if
154 #if str($read_mod_options.rename) != '':
155 --rename='$read_mod_options.rename'
156 #end if
157 $read_mod_options.zero_cap
158
159
160 '${read1}'
161 #if $paired:
162 '${read2}'
163 #if $library.r2.quality_cutoff2:
164 -Q=$library.r2.quality_cutoff2
165 #end if
166 #end if
167
168 #if 'report' in $output_selector:
169 > report.txt
170 #end if
171 ]]></command>
172 <inputs>
173
174 <!-- Reads -->
175 <conditional name="library">
176 <param name="type" type="select" label="Single-end or Paired-end reads?">
177 <option value="single">Single-end</option>
178 <option value="paired">Paired-end</option>
179 <option value="paired_collection">Paired-end Collection</option>
180 </param>
181
182 <when value="single">
183 <param name="input_1" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file" help="Should be of datatype &quot;fastq.gz&quot; or &quot;fasta&quot;" />
184 <expand macro="single_end_options" />
185 </when>
186
187 <when value="paired">
188 <param name="input_1" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file #1" help="Should be of datatype &quot;fastq.gz&quot;or &quot;fasta&quot;" />
189 <param name="input_2" format="@FASTQ_TYPES@" type="data" label="FASTQ/A file #2" help="Should be of datatype &quot;fastq.gz&quot;or &quot;fasta&quot;" />
190 <expand macro="paired_end_options" />
191 </when>
192
193 <when value="paired_collection">
194 <param name="input_1" format="@FASTQ_TYPES@" type="data_collection" collection_type="paired" label="Paired Collection" help="Should be of datatype &quot;fastq.gz&quot; or &quot;fasta&quot;" />
195 <expand macro="paired_end_options" />
196 </when>
197
198 </conditional>
199
200 <!-- Adapter Options -->
201 <section name="adapter_options" title="Adapter Options">
202 <param name="action" type="select" label="What to do if a match is found">
203 <option value="trim" selected="True">Trim: trim adapter and upstream or downstream sequence</option>
204 <option value="retain">Retain: the read is trimmed, but the adapter sequence is not removed</option>
205 <option value="mask">Mask: mask adapters with 'N' characters instead of trimming them</option>
206 <option value="lowercase">Lowercase: convert to lowercase</option>
207 <option value="none">None: leave unchanged</option>
208 </param>
209 <param name="internal" type="select" label="Disallow internal adaptor ocurrences" help="The non-internal 5’ and 3’ adapter types disallow internal occurrences of the adapter sequence. The adapter must always be at one of the ends of the read, but partial occurrences are also ok.">
210 <option value="X">Enabled</option>
211 <option value="" selected="True">Disabled</option>
212 </param>
213 <param argument="--error-rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." />
214 <param argument="--no-indels" type="boolean" value="False" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." />
215 <param argument="--times" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." />
216 <param argument="--overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." />
217 <param name="match_read_wildcards" type="select" label="Match wildcards" help="Allow 'N's as matches. Default: in the adapters but not in the reads">
218 <option value=" " selected="True">In the adapters but not in the reads</option>
219 <option value="--match-read-wildcards">In the adapters and in the reads</option>
220 <option value="--no-match-adapter-wildcards">Nowhere</option>
221 </param>
222 <param argument="--revcomp" type="boolean" value="False" truevalue="--revcomp" falsevalue="" label="Look for adapters in the reverse complement" help="Check both the read and its reverse complement for adapter matches. If match is on reverse-complemented version, output that one. Default: check only read." />
223 </section>
224
225 <!-- Filter Options -->
226 <section name="filter_options" title="Filter Options">
227 <param argument="--discard-trimmed" type="boolean" value="False" truevalue="--discard-trimmed" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" />
228 <param argument="--discard_untrimmed" type="boolean" value="False" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." />
229 <param argument="--minimum-length" type="integer" min="0" optional="True" value="25" label="Minimum length (R1)" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded." />
230 <param argument="--maximum-length" type="integer" min="0" optional="True" value="" label="Maximum length (R1)" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded." />
231 <conditional name="length_R2_options">
232 <param name="length_R2_status" type="select" label="Specify a minimum/maximum length for reverse reads (R2)" help="When trimming paired-end reads, the minimum/maximum lengths for R1 and R2 can be specified separately. If not provided, the same minimum length applies to both reads.">
233 <option value="True">Enabled</option>
234 <option value="False" selected="True">Disabled</option>
235 </param>
236 <when value="True">
237 <param name="R2_minimum" type="integer" min="0" value="" optional="True" label="Minimum length (R2)" />
238 <param name="R2_maximum" type="integer" min="0" value="" optional="True" label="Maximum length (R2)" />
239 </when>
240 <when value="False">
241 </when>
242 </conditional>
243 <param argument="--max-n" type="float" min="0" optional="True" label="Max N" help="Discard reads with more than this number of 'N' bases. A number between 0 and 1 is interpreted as a fraction of the read length." />
244 <param argument="--pair-filter" type="select" optional="True" label="Pair filter" help="Which of the reads in a paired-end read have to match the filtering criterion in order for the pair to be filtered. Default: any">
245 <option value="any" selected="True">Any: a read pair is discarded (or redirected) if one of the reads (R1 or R2) fulfills the filtering criterion. </option>
246 <option value="both">Both: filtering criteria must apply to both reads in order for a read pair to be discarded. </option>
247 <option value="first">First: will make a decision about the read pair by inspecting whether the filtering criterion applies to the first read, ignoring the second read.</option>
248
249 </param>
250 <param argument="--max-expected-errors" type="integer" min="0" optional="True" value="" label="Max expected errors" help="Discard reads whose expected number of errors (computed from quality values) exceeds this value." />
251 <param argument="--discard-cassava" type="boolean" truevalue="--discard-cassava" falsevalue="" checked="False" label="Discard CASAVA filtering" help="Discard reads that did not pass CASAVA filtering (header has :Y:)." />
252 </section>
253
254 <!-- Read Modification Options -->
255 <section name="read_mod_options" title="Read Modification Options">
256 <param argument="--quality-cutoff" type="text" value="0" label="Quality cutoff" help=" Trim low-quality bases from 5' and/or 3' ends of each read before adapter removal. Applied to both reads for paired-end data, unless a separate value for the second read is specified. If one value is given, only the 3' end is trimmed. If two comma-separated cutoffs are given, the 5' end is trimmed with the first cutoff, the 3' end with the second.">
257 <sanitizer>
258 <valid initial="string.digits"><add value="," /></valid>
259 </sanitizer>
260 <validator type="regex">[0-9,]+</validator>
261 </param>
262 <param argument="--nextseq-trim" type="integer" value="0" label="NextSeq trimming" help="Experimental option for quality trimming of NextSeq data. This is necessary because that machine cannot distinguish between G and reaching the end of the fragment (it encodes G as ‘black’). This option works like regular quality trimming (where one would use -q 20 instead), except that the qualities of G bases are ignored." />
263 <param argument="--trim-n" type="boolean" truevalue="--trim-n" falsevalue="" checked="False" label="Trim Ns" help="Trim N's on ends of reads." />
264 <param argument="--strip-suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." />
265 <conditional name="shorten_options">
266 <param name="shorten_values" type="select" label="Shortening reads to a fixed length" help="If you want to remove a fixed number of bases from each read, use the –cut option instead.">
267 <option value="True">Enabled</option>
268 <option value="False" selected="True">Disabled</option>
269 </param>
270 <when value="True">
271 <param argument="--length" type="integer" value="0" label="Length" help="Shorten reads to this length. This modification is applied after adapter trimming." />
272 <param name="shorten_end" type="select" label="End at which to apply the slice">
273 <option value="3prime">3' ends</option>
274 <option value="5prime">5' ends</option>
275 </param>
276 </when>
277 <when value="False">
278 </when>
279 </conditional>
280 <param argument="--length-tag" label="Length tag" type="text" optional="True" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." >
281 <sanitizer invalid_char="">
282 <valid initial="string.letters,string.digits">
283 <add value="/" />
284 <add value="+" />
285 <add value="-" />
286 <add value="=" />
287 <add value=" " />
288 <add value="_" />
289 </valid>
290 </sanitizer>
291 <validator type="regex">[A-Za-z0-9 =-_/+]+</validator>
292 </param>
293 <param argument="--rename" label="Rename reads" type="text" optional="True" help="This option can be used to rename both single-end and paired-end reads. ">
294 <sanitizer invalid_char="">
295 <valid initial="string.letters,string.digits">
296 <add value="{" />
297 <add value="}" />
298 <add value="_" />
299 <add value="=" />
300 <add value=" " />
301 </valid>
302 </sanitizer>
303 <validator type="regex">[A-Za-z0-9 {}=_]+</validator>
304 </param>
305 <param argument="--zero-cap" type="boolean" truevalue="--zero-cap" falsevalue="" checked="False" label="Change negative quality values to zero" />
306 </section>
307
308 <!-- Output Options -->
309 <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Outputs selector">
310 <option value="report">Report: Cutadapt's per-adapter statistics. You can use this file with MultiQC.</option>
311 <option value="info_file">Info file: write information about each read and its adapter matches.</option>
312 <option value="rest_file">Rest of read: when the adapter matches in the middle of a read, write the rest (after the adapter).</option>
313 <option value="wildcard_file">Wildcard file: when the adapter has wildcard bases (Ns) write adapter bases matching wildcard positions.</option>
314 <option value="too_short_file">Too short reads: write reads that are too short according to minimum length specified (default: discard reads).</option>
315 <option value="too_long_file">Too long reads: write reads that are too long (according to maximum length specified)</option>
316 <option value="untrimmed_file">Untrimmed reads: write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file (default: output to same file as trimmed)</option>
317 <option value="multiple_output">Multiple output: create a separate file for each adapter trimmed (default: all trimmed reads are in a single file)</option>
318 <option value="json_stats">Statistics in JSON format</option>
319 </param>
320 </inputs>
321
322 <outputs>
323 <data name="out1" format="fastqsanger" metadata_source="input_1" from_work_dir="out1*" label="${tool.name} on ${on_string}: Read 1 Output">
324 <filter>library['type'] != 'paired_collection' and 'multiple_output' not in output_selector</filter>
325 <expand macro="inherit_format_1" />
326 </data>
327
328 <data name="out2" format="fastqsanger" metadata_source="input_2" from_work_dir="out2*" label="${tool.name} on ${on_string}: Read 2 Output" >
329 <filter>library['type'] == 'paired' and 'multiple_output' not in output_selector</filter>
330 <expand macro="inherit_format_2" />
331 </data>
332
333 <collection name="out_pairs" type="paired" format_source="input_1" label="${tool.name} on ${on_string}: Reads">
334 <filter>library['type'] == 'paired_collection' and 'multiple_output' not in output_selector</filter>
335 <data name="forward" from_work_dir="out1.fq*" />
336 <data name="reverse" from_work_dir="out2.fq*" />
337 </collection>
338
339 <data name="report" format="txt" from_work_dir="report.txt" label="${tool.name} on ${on_string}: Report">
340 <filter>output_selector and 'report' in output_selector</filter>
341 </data>
342 <data name="info_file" format="txt" metadata_source="input_1" label="${tool.name} on ${on_string}: Info File" >
343 <filter>output_selector and 'info_file' in output_selector</filter>
344 </data>
345
346 <data name="rest_output" format="fastqsanger" metadata_source="input_1" from_work_dir="rest_output*" label="${tool.name} on ${on_string}: Rest of Reads (R1 only)" >
347 <filter>output_selector and 'rest_file' in output_selector</filter>
348 <expand macro="inherit_format_1" />
349 </data>
350
351 <data name="wild_output" format="txt" metadata_source="input_1" from_work_dir="wild_output*" label="${tool.name} on ${on_string}: Wildcard File" >
352 <filter>output_selector and 'wildcard_file' in output_selector</filter>
353 </data>
354
355 <data name="untrimmed_output" format="fastqsanger" metadata_source="input_1" from_work_dir="untrimmed_output*" label="${tool.name} on ${on_string}: Untrimmed Read 1" >
356 <filter>output_selector and 'untrimmed_file' in output_selector</filter>
357 <expand macro="inherit_format_1" />
358 </data>
359 <data name="untrimmed_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="untrimmed_paired_output*" label="${tool.name} on ${on_string}: Untrimmed Read 2" >
360 <filter>library['type'] == 'paired' or library['type'] == 'paired_collection'</filter>
361 <filter>output_selector and 'untrimmed_file' in output_selector </filter>
362 <expand macro="inherit_format_2" />
363 </data>
364
365 <data name="too_short_output" format="fastqsanger" metadata_source="input_1" from_work_dir="too_short_output*" label="${tool.name} on ${on_string}: Too Short Read 1" >
366 <filter>output_selector and 'too_short_file' in output_selector</filter>
367 <expand macro="inherit_format_1" />
368 </data>
369 <data name="too_short_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="too_short_paired_output*" label="${tool.name} on ${on_string}: Too Short Read 2" >
370 <filter>library['type'] == 'paired' or library['type'] == 'paired_collection'</filter>
371 <filter>output_selector and 'too_short_file' in output_selector</filter>
372 <expand macro="inherit_format_2" />
373 </data>
374 <data name="too_long_output" format="fastqsanger" metadata_source="input_1" from_work_dir="too_long_output*" label="${tool.name} on ${on_string}: Too Long Read 1" >
375 <filter>output_selector and 'too_long_file' in output_selector</filter>
376 <expand macro="inherit_format_1" />
377 </data>
378 <data name="too_long_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="too_long_paired_output*" label="${tool.name} on ${on_string}: Too Long Read 2" >
379 <filter>library['type'] == 'paired' or library['type'] == 'paired_collection'</filter>
380 <filter>output_selector and 'too_long_file' in output_selector</filter>
381 <expand macro="inherit_format_2" />
382 </data>
383 <data name="json_stats" format="json" from_work_dir="stats.json" label="${tool.name} on ${on_string}: Statistics (JSON)" >
384 <filter>output_selector and 'json_stats' in output_selector</filter>
385 </data>
386 <collection name="split_output" type="list" label="${tool.name} on ${on_string}: Split outputs" format="fastqsanger" >
387 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fastq.*)" directory="split" />
388 <filter>output_selector and 'multiple_output' in output_selector</filter>
389 </collection>
390 </outputs>
391
392 <tests>
393 <!-- Ensure fastq works -->
394 <test expect_num_outputs="1">
395 <param name="type" value="single" />
396 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
397 <section name="r1">
398 <repeat name="adapters">
399 <conditional name="adapter_source">
400 <param name="adapter_source_list" value="user"/>
401 <param name="adapter" value="AGATCGGAAGAGC"/>
402 </conditional>
403 </repeat>
404 </section>
405 <output name="out1" file="cutadapt_small.out" ftype="fastq"/>
406 </test>
407 <!-- Ensure single end fastq.gz works -->
408 <test expect_num_outputs="1">
409 <param name="type" value="single" />
410 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
411 <section name="r1">
412 <repeat name="adapters">
413 <conditional name="adapter_source">
414 <param name="adapter_source_list" value="user"/>
415 <param name="adapter" value="AGATCGGAAGAGC"/>
416 </conditional>
417 </repeat>
418 </section>
419 <output name="out1" decompress="True" file="cutadapt_out1.fq.gz" ftype="fastq.gz"/>
420 </test>
421 <!-- Ensure paired end fastq.gz works -->
422 <test expect_num_outputs="2">
423 <param name="type" value="paired" />
424 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
425 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" />
426 <section name="r1">
427 <repeat name="adapters">
428 <conditional name="adapter_source">
429 <param name="adapter_source_list" value="user"/>
430 <param name="adapter" value="AGATCGGAAGAGC"/>
431 </conditional>
432 </repeat>
433 </section>
434 <section name="r2">
435 <repeat name="adapters2">
436 <conditional name="adapter_source2">
437 <param name="adapter_source_list2" value="user"/>
438 <param name="adapter2" value="AGATCGGAAGAGC"/>
439 </conditional>
440 </repeat>
441 </section>
442 <output name="out1" decompress="True" file="cutadapt_out1.fq.gz" ftype="fastq.gz"/>
443 <output name="out2" decompress="True" file="cutadapt_out2.fq.gz" ftype="fastq.gz"/>
444 <assert_command>
445 <not_has_text text="--discard-trimmed"/>
446 <not_has_text text="--discard-untrimmed"/>
447 <not_has_text text="--minimum-length"/>
448 <not_has_text text="--maximum-length"/>
449 <not_has_text text="--max-n"/>
450 <has_text text="--pair-filter=any"/>
451 </assert_command>
452 </test>
453 <!-- Ensure paired collection works -->
454 <test expect_num_outputs="3">
455 <param name="type" value="paired_collection" />
456 <param name="input_1">
457 <collection type="paired">
458 <element name="forward" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
459 <element name="reverse" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" />
460 </collection>
461 </param>
462 <section name="r1">
463 <repeat name="adapters">
464 <conditional name="adapter_source">
465 <param name="adapter_source_list" value="user"/>
466 <param name="adapter" value="AGATCGGAAGAGC"/>
467 </conditional>
468 </repeat>
469 </section>
470 <section name="r2">
471 <repeat name="adapters2">
472 <conditional name="adapter_source2">
473 <param name="adapter_source_list2" value="user"/>
474 <param name="adapter2" value="AGATCGGAAGAGC"/>
475 </conditional>
476 </repeat>
477 </section>
478 <output_collection name="out_pairs" type="paired" count="2">
479 <element name="forward" decompress="true" file="cutadapt_out1.fq.gz" ftype="fastq.gz">
480 <assert_contents>
481 <has_size value="12000" delta="2000" />
482 </assert_contents>
483 </element>
484 <element name="reverse" decompress="true" file="cutadapt_out2.fq.gz" ftype="fastq.gz">
485 <assert_contents>
486 <has_size value="12000" delta="2000" />
487 </assert_contents>
488 </element>
489 </output_collection>
490 </test>
491 <!-- Ensure built-in adapters work -->
492 <test expect_num_outputs="1">
493 <param name="type" value="single" />
494 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
495 <section name="r1">
496 <repeat name="adapters">
497 <conditional name="adapter_source">
498 <param name="adapter_source_list" value="builtin"/>
499 <param name="adapter" value="TGTAGGCC"/>
500 </conditional>
501 </repeat>
502 </section>
503 <output name="out1" file="cutadapt_builtin.out" ftype="fastq"/>
504 </test>
505 <!-- Ensure discard file output works -->
506 <test expect_num_outputs="1">
507 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
508 <section name="r1">
509 <repeat name="adapters">
510 <conditional name="adapter_source">
511 <param name="adapter_source_list" value="user"/>
512 <param name="adapter" value="TTAGACATATCTCCGTCG"/>
513 </conditional>
514 </repeat>
515 </section>
516 <section name="filter_options">
517 <param name="discard_trimmed" value="True"/>
518 </section>
519 <output name="out1" file="cutadapt_discard.out" ftype="fastq"/>
520 <assert_command>
521 <has_text text="--discard-trimmed"/>
522 </assert_command>
523 </test>
524 <!-- Ensure rest file output works, test json output -->
525 <test expect_num_outputs="3">
526 <param name="input_1" ftype="fasta" value="cutadapt_rest.fa" />
527 <section name="r1">
528 <repeat name="adapters">
529 <conditional name="adapter_source">
530 <param name="adapter_source_list" value="user"/>
531 <param name="adapter" value="AAAGATG"/>
532 </conditional>
533 </repeat>
534 </section>
535 <param name="output_selector" value="rest_file,json_stats"/>
536 <output name="out1" file="cutadapt_rest.out" ftype="fasta"/>
537 <output name="json_stats" file="cutadapt_rest.json" ftype="json" lines_diff="4"/> <!--allow for differing cutadapt and python version-->
538 <output name="rest_output" file="cutadapt_rest2.out" ftype="fasta"/>
539 </test>
540 <!-- Ensure nextseq-trim option works -->
541 <test expect_num_outputs="1">
542 <param name="type" value="single" />
543 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
544 <section name="r1">
545 <repeat name="adapters">
546 <conditional name="adapter_source">
547 <param name="adapter_source_list" value="user"/>
548 <param name="adapter" value="AGATCGGAAGAGC"/>
549 </conditional>
550 </repeat>
551 </section>
552 <param name="nextseq_trim" value="20" />
553 <output name="out1" decompress="True" file="cutadapt_nextseq_out.fq.gz" ftype="fastq.gz"/>
554 </test>
555 <!-- Ensure Report and Info file output work -->
556 <test expect_num_outputs="3">
557 <param name="type" value="single" />
558 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
559 <section name="r1">
560 <repeat name="adapters">
561 <conditional name="adapter_source">
562 <param name="adapter_source_list" value="user"/>
563 <param name="adapter" value="AGATCGGAAGAGC"/>
564 </conditional>
565 </repeat>
566 </section>
567 <param name="output_selector" value="report,info_file" />
568 <output name="out1" value="cutadapt_small.out" ftype="fastq"/>
569 <output name="report">
570 <assert_contents>
571 <has_text text="Summary"/>
572 </assert_contents>
573 </output>
574 <output name="info_file" value="cutadapt_info_out.txt" ftype="txt"/>
575 </test>
576
577
578 <test expect_num_outputs="1">
579 <conditional name="library">
580 <param name="type" value="single" />
581 <param name="input_1" ftype="fastq" value="cutadapt_in_split.fastq" />
582 <section name="r1" >
583 <repeat name="front_adapters">
584 <conditional name="front_adapter_source">
585 <param name="front_adapter_source_list" value="user"/>
586 <param name="front_adapter_name" value="A1" />
587 <param name="front_adapter" value="^GTCGGTAA" />
588 </conditional>
589 </repeat>
590 <repeat name="front_adapters">
591 <conditional name="front_adapter_source">
592 <param name="front_adapter_source_list" value="user"/>
593 <param name="front_adapter_name" value="A2" />
594 <param name="front_adapter" value="^AGGTCACT" />
595 </conditional>
596 </repeat>
597 </section>
598 </conditional>
599 <param name="output_selector" value="multiple_output" />
600 <output_collection name="split_output" type="list" count="3">
601 <element name="A1" value="A1.fastq" ftype="fastq">
602 </element>
603 <element name="A2" value="A2.fastq" ftype="fastq">
604 </element>
605 <element name="unknown" value="unknown.fastq" ftype="fastq">
606 </element>
607 </output_collection>
608 </test>
609
610 <test expect_num_outputs="1">
611 <conditional name="library">
612 <param name="type" value="single" />
613 <param name="input_1" ftype="fastq.gz" value="cutadapt_in_split.fastq.gz" />
614 <section name="r1" >
615 <repeat name="front_adapters">
616 <conditional name="front_adapter_source">
617 <param name="front_adapter_source_list" value="file"/>
618 <param name="front_adapter_file" ftype="fasta" value="barcodes.fasta" />
619 </conditional>
620 </repeat>
621 </section>
622 </conditional>
623 <param name="output_selector" value="multiple_output" />
624 <output_collection name="split_output" type="list" count="3">
625 <element name="A1" decompress="True" file="A1.fastq.gz" ftype="fastq.gz">
626 </element>
627 <element name="A2" decompress="True" file="A2.fastq.gz" ftype="fastq.gz">
628 </element>
629 <element name="unknown" decompress="True" file="unknown.fastq.gz" ftype="fastq.gz">
630 </element>
631 </output_collection>
632 </test>
633
634 <!-- Ensure untrimmed file output works -->
635 <test expect_num_outputs="2">
636 <param name="type" value="single" />
637 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
638 <section name="r1">
639 <repeat name="adapters">
640 <conditional name="adapter_source">
641 <param name="adapter_source_list" value="user"/>
642 <param name="adapter" value="AAAT"/>
643 </conditional>
644 </repeat>
645 </section>
646 <param name="output_selector" value="untrimmed_file" />
647 <output name="out1" file="cutadapt_trimmed.out" ftype="fastq"/>
648 <output name="untrimmed_output" file="cutadapt_untrimmed.out" ftype="fastq"/>
649 </test>
650 <!-- Ensure untrimmed gzip file output works -->
651 <test expect_num_outputs="2">
652 <param name="type" value="single" />
653 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
654 <section name="r1">
655 <repeat name="adapters">
656 <conditional name="adapter_source">
657 <param name="adapter_source_list" value="user"/>
658 <param name="adapter" value="AGATCGGAAGAGC"/>
659 </conditional>
660 </repeat>
661 </section>
662 <param name="output_selector" value="untrimmed_file" />
663 <output name="out1" decompress="True" file="cutadapt_trimmed.out.gz" ftype="fastq.gz"/>
664 <!--
665 Do not use the decompress option for this assertion, since it does NOT test that the file is compressed
666 See discussion at https://github.com/galaxyproject/galaxy/issues/7671
667 `delta="4000" is more than the difference between gzip level 1 and gzip level 9, but much less than the
668 difference between level 1 compression and no compression
669 -->
670 <output name="untrimmed_output" file="cutadapt_untrimmed.out.gz" compare="sim_size" delta="4000" ftype="fastq.gz"/>
671 </test>
672 <!-- same as 1st test with paired data + filter options (because of discard_untrimmed no comparison is done) -->
673 <test expect_num_outputs="2">
674 <param name="type" value="paired" />
675 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
676 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" />
677 <section name="r1">
678 <repeat name="adapters">
679 <conditional name="adapter_source">
680 <param name="adapter_source_list" value="user"/>
681 <param name="adapter" value="AGATCGGAAGAGC"/>
682 </conditional>
683 </repeat>
684 </section>
685 <section name="r2">
686 <repeat name="adapters">
687 <conditional name="adapter_source">
688 <param name="adapter_source_list2" value="user"/>
689 <param name="adapter2" value="AGATCGGAAGAGC"/>
690 </conditional>
691 </repeat>
692 </section>
693 <section name="filter_options">
694 <param name="discard_untrimmed" value="true"/>
695 <param name="minimun_length" value="1"/>
696 <param name="maximum_length" value="1000"/>
697 <param name="max_n" value="0"/>
698 <param name="pair_filter" value="both"/>
699 </section>
700 <assert_command>
701 <has_text text="--discard-untrimmed"/>
702 <has_text text="--maximum-length=1000"/>
703 <has_text text="--max-n=0"/>
704 <has_text text="--pair-filter=both"/>
705 </assert_command>
706 </test>
707 <!-- Test cut option -->
708 <test expect_num_outputs="1">
709 <param name="type" value="single" />
710 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
711 <section name="r1">
712 <repeat name="adapters">
713 <conditional name="adapter_source">
714 <param name="adapter_source_list" value="user"/>
715 <param name="adapter" value="AGATCGGAAGAGC"/>
716 </conditional>
717 </repeat>
718 <param name="cut" value="5"/>
719 </section>
720 <output name="out1" file="cutadapt_small_cut.out" ftype="fastq"/>
721 </test>
722 <!-- Test rename options -->
723 <test expect_num_outputs="1">
724 <param name="type" value="single" />
725 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
726 <section name="r1">
727 <repeat name="adapters">
728 <conditional name="adapter_source">
729 <param name="adapter_source_list" value="user"/>
730 <param name="adapter" value="AGATCGGAAGAGC"/>
731 </conditional>
732 </repeat>
733 <param name="cut" value="5"/>
734 </section>
735 <section name="read_mod_options">
736 <param name="rename" value="{id} barcode={cut_prefix}"/>
737 </section>
738 <output name="out1" file="cutadapt_small_rename.out" ftype="fastq">
739 <assert_contents>
740 <has_text text="@prefix:1_13_1259/1 barcode=AGCCG"/>
741 </assert_contents>
742 </output>
743 </test>
744 <!-- Test action options -->
745 <test expect_num_outputs="1">
746 <param name="type" value="single" />
747 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
748 <section name="r1">
749 <repeat name="adapters">
750 <conditional name="adapter_source">
751 <param name="adapter_source_list" value="user"/>
752 <param name="adapter" value="CGTCCGAANTAG"/>
753 </conditional>
754 </repeat>
755 </section>
756 <section name="adapter_options">
757 <param name="action" value="retain"/>
758 </section>
759 <output name="out1" file="cutadapt_action_retain.out" ftype="fastq"/>
760 </test>
761 <test expect_num_outputs="1">
762 <param name="type" value="single" />
763 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
764 <section name="r1">
765 <repeat name="adapters">
766 <conditional name="adapter_source">
767 <param name="adapter_source_list" value="user"/>
768 <param name="adapter" value="CGTCCGAANTAG"/>
769 </conditional>
770 </repeat>
771 </section>
772 <section name="adapter_options">
773 <param name="action" value="mask"/>
774 </section>
775 <output name="out1" file="cutadapt_action_mask.out" ftype="fastq"/>
776 </test>
777 <test expect_num_outputs="1">
778 <param name="type" value="single" />
779 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
780 <section name="r1">
781 <repeat name="adapters">
782 <conditional name="adapter_source">
783 <param name="adapter_source_list" value="user"/>
784 <param name="adapter" value="CGTCCGAANTAG"/>
785 </conditional>
786 </repeat>
787 </section>
788 <section name="adapter_options">
789 <param name="action" value="lowercase"/>
790 </section>
791 <output name="out1" file="cutadapt_action_lowercase.out" ftype="fastq"/>
792 </test>
793 <test expect_num_outputs="1">
794 <param name="type" value="single" />
795 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
796 <section name="r1">
797 <repeat name="adapters">
798 <conditional name="adapter_source">
799 <param name="adapter_source_list" value="user"/>
800 <param name="adapter" value="CGTCCGAANTAG"/>
801 </conditional>
802 </repeat>
803 </section>
804 <section name="adapter_options">
805 <param name="action" value="none"/>
806 </section>
807 <output name="out1" file="cutadapt_action_none.out" ftype="fastq"/>
808 </test>
809 <!-- Test revcomp options -->
810 <test expect_num_outputs="1">
811 <param name="type" value="single" />
812 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
813 <section name="r1">
814 <repeat name="adapters">
815 <conditional name="adapter_source">
816 <param name="adapter_source_list" value="user"/>
817 <param name="adapter" value="TAAACAGATTAGT"/>
818 </conditional>
819 </repeat>
820 </section>
821 <section name="adapter_options">
822 <param name="revcomp" value="true"/>
823 </section>
824 <output name="out1" file="cutadapt_revcomp.out" ftype="fastq"/>
825 </test>
826 <!-- Test minimum lenghth paired-reads -->
827 <test expect_num_outputs="2">
828 <param name="type" value="paired" />
829 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
830 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
831 <section name="r1">
832 <repeat name="adapters">
833 <conditional name="adapter_source">
834 <param name="adapter_source_list" value="user"/>
835 <param name="adapter" value="ATCTGGTTCC"/>
836 </conditional>
837 </repeat>
838 </section>
839 <section name="r2">
840 <repeat name="adapters2">
841 <conditional name="adapter_source2">
842 <param name="adapter_source_list2" value="user"/>
843 <param name="adapter2" value="CTACAAG"/>
844 </conditional>
845 </repeat>
846 </section>
847 <section name="filter_options">
848 <param name="minimum_length" value="30"/>
849 <param name="pair_filter" value="both"/>
850 <conditional name="length_R2_options">
851 <param name="length_R2_status" value="True"/>
852 <param name="R2_minimum" value="10"/>
853 </conditional>
854 </section>
855 <output name="out1" decompress="True" file="cutadapt_out1_min_length.fq.gz" ftype="fastq.gz"/>
856 <output name="out2" decompress="True" file="cutadapt_out2_min_length.fq.gz" ftype="fastq.gz"/>
857 <assert_command>
858 <has_text text="--minimum-length=30:10"/>
859 <has_text text="--pair-filter=both"/>
860 </assert_command>
861 </test>
862 <!-- Test maximum lenghth paired-reads -->
863 <test expect_num_outputs="2">
864 <param name="type" value="paired" />
865 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
866 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
867 <section name="r1">
868 <repeat name="adapters">
869 <conditional name="adapter_source">
870 <param name="adapter_source_list" value="user"/>
871 <param name="adapter" value="AGATCGGAAGAGC"/>
872 </conditional>
873 </repeat>
874 </section>
875 <section name="r2">
876 <repeat name="adapters2">
877 <conditional name="adapter_source2">
878 <param name="adapter_source_list2" value="user"/>
879 <param name="adapter2" value="AGATCGGAAGAGC"/>
880 </conditional>
881 </repeat>
882 </section>
883 <section name="filter_options">
884 <param name="pair_filter" value="both"/>
885 <param name="maximum_length" value="50"/>
886 <conditional name="length_R2_options">
887 <param name="length_R2_status" value="True"/>
888 <param name="R2_maximum" value="30"/>
889 </conditional>
890 </section>
891 <output name="out1" decompress="True" file="cutadapt_out1_max_length.fq.gz" ftype="fastq.gz"/>
892 <output name="out2" decompress="True" file="cutadapt_out2_max_length.fq.gz" ftype="fastq.gz"/>
893 <assert_command>
894 <has_text text="--maximum-length=50:30"/>
895 <has_text text="--pair-filter=both"/>
896 </assert_command>
897 </test>
898 <!-- Test combination maximum and minimum length paired reads -->
899 <test expect_num_outputs="2">
900 <param name="type" value="paired" />
901 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
902 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
903 <section name="r1">
904 <repeat name="adapters">
905 <conditional name="adapter_source">
906 <param name="adapter_source_list" value="user"/>
907 <param name="adapter" value="AGATCGGAAGAGC"/>
908 </conditional>
909 </repeat>
910 </section>
911 <section name="r2">
912 <repeat name="adapters2">
913 <conditional name="adapter_source2">
914 <param name="adapter_source_list2" value="user"/>
915 <param name="adapter2" value="AGATCGGAAGAGC"/>
916 </conditional>
917 </repeat>
918 </section>
919 <section name="filter_options">
920 <param name="pair_filter" value="both"/>
921 <param name="minimum_length" value="10"/>
922 <param name="maximum_length" value="50"/>
923 <conditional name="length_R2_options">
924 <param name="length_R2_status" value="True"/>
925 <param name="R2_maximum" value="30"/>
926 </conditional>
927 </section>
928 <output name="out1" decompress="True" file="cutadapt_out1_max_min_01.fq.gz" ftype="fastq.gz"/>
929 <output name="out2" decompress="True" file="cutadapt_out2_max_min_01.fq.gz" ftype="fastq.gz"/>
930 <assert_command>
931 <has_text text="--minimum-length=10:"/>
932 <has_text text="--maximum-length=50:30"/>
933 <has_text text="--pair-filter=both"/>
934 </assert_command>
935 </test>
936 <test expect_num_outputs="2">
937 <param name="type" value="paired" />
938 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
939 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
940 <section name="r1">
941 <repeat name="adapters">
942 <conditional name="adapter_source">
943 <param name="adapter_source_list" value="user"/>
944 <param name="adapter" value="AGATCGGAAGAGC"/>
945 </conditional>
946 </repeat>
947 </section>
948 <section name="r2">
949 <repeat name="adapters2">
950 <conditional name="adapter_source2">
951 <param name="adapter_source_list2" value="user"/>
952 <param name="adapter2" value="AGATCGGAAGAGC"/>
953 </conditional>
954 </repeat>
955 </section>
956 <section name="filter_options">
957 <param name="pair_filter" value="both"/>
958 <param name="minimum_length" value="10"/>
959 <param name="maximum_length" value="50"/>
960 <conditional name="length_R2_options">
961 <param name="length_R2_status" value="True"/>
962 <param name="R2_minimum" value="10"/>
963 </conditional>
964 </section>
965 <output name="out1" decompress="True" file="cutadapt_out1_max_min_02.fq.gz" ftype="fastq.gz"/>
966 <output name="out2" decompress="True" file="cutadapt_out2_max_min_02.fq.gz" ftype="fastq.gz"/>
967 <assert_command>
968 <has_text text="--minimum-length=10:10"/>
969 <has_text text="--maximum-length=50:"/>
970 <has_text text="--pair-filter=both"/>
971 </assert_command>
972 </test>
973 <!-- Test length options -->
974 <test expect_num_outputs="1">
975 <param name="type" value="single" />
976 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
977 <section name="r1">
978 <repeat name="adapters">
979 <conditional name="adapter_source">
980 <param name="adapter_source_list" value="user"/>
981 <param name="adapter" value="AGCCGCTANGACG"/>
982 </conditional>
983 </repeat>
984 </section>
985 <section name="read_mod_options">
986 <conditional name="shorten_options">
987 <param name="shorten_values" value="True"/>
988 <param name="shorten_end" value="3prime"/>
989 <param name="length" value="10"/>
990 </conditional>
991 </section>
992 <output name="out1" file="cutadapt_shorten_3prime.out" ftype="fastq"/>
993 </test>
994 <test expect_num_outputs="1">
995 <param name="type" value="single" />
996 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
997 <section name="r1">
998 <repeat name="adapters">
999 <conditional name="adapter_source">
1000 <param name="adapter_source_list" value="user"/>
1001 <param name="adapter" value="AGCCGCTANGACG"/>
1002 </conditional>
1003 </repeat>
1004 </section>
1005 <section name="read_mod_options">
1006 <conditional name="shorten_options">
1007 <param name="shorten_values" value="True"/>
1008 <param name="shorten_end" value="5prime"/>
1009 <param name="length" value="10"/>
1010 </conditional>
1011 </section>
1012 <output name="out1" file="cutadapt_shorten_5prime.out" ftype="fastq"/>
1013 </test>
1014 <!-- Test max expected errors options -->
1015 <test expect_num_outputs="1">
1016 <param name="type" value="single" />
1017 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
1018 <section name="r1">
1019 <repeat name="adapters">
1020 <conditional name="adapter_source">
1021 <param name="adapter_source_list" value="user"/>
1022 <param name="adapter" value="AGCGGCTTAGACG"/>
1023 </conditional>
1024 </repeat>
1025 </section>
1026 <section name="filter_options">
1027 <param name="max_expected_errors" value="10"/>
1028 </section>
1029 <output name="out1" file="cutadapt_shorten_expected_errors.out" ftype="fastq"/>
1030 </test>
1031 <!-- Test disallow internal adaptors option -->
1032 <test expect_num_outputs="1">
1033 <param name="type" value="single" />
1034 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
1035 <section name="r1">
1036 <repeat name="adapters">
1037 <conditional name="adapter_source">
1038 <param name="adapter_source_list" value="user"/>
1039 <param name="adapter" value="GAANTAGCTACCAC"/>
1040 </conditional>
1041 </repeat>
1042 </section>
1043 <section name="adapter_options">
1044 <param name="internal" value="X"/>
1045 </section>
1046 <output name="out1" file="cutadapt_shorten_internal_adapters.out" ftype="fastq"/>
1047 <assert_command>
1048 <has_text text="GAANTAGCTACCACX"/>
1049 </assert_command>
1050 </test>
1051 <test expect_num_outputs="2">
1052 <param name="type" value="paired" />
1053 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
1054 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
1055 <section name="r1">
1056 <repeat name="adapters">
1057 <conditional name="adapter_source">
1058 <param name="adapter_source_list" value="user"/>
1059 <param name="adapter" value="AGATCGGAAGAGC"/>
1060 </conditional>
1061 </repeat>
1062 </section>
1063 <section name="r2">
1064 <repeat name="adapters2">
1065 <conditional name="adapter_source2">
1066 <param name="adapter_source_list2" value="user"/>
1067 <param name="adapter2" value="AGATCGGAAGAGC"/>
1068 </conditional>
1069 </repeat>
1070 </section>
1071 <section name="adapter_options">
1072 <param name="internal" value="X"/>
1073 </section>
1074 <output name="out1" decompress="True" file="cutadapt_out1_internal_adapter.fq.gz" ftype="fastq.gz"/>
1075 <output name="out2" decompress="True" file="cutadapt_out2_internal_adapter.fq.gz" ftype="fastq.gz"/>
1076 <assert_command>
1077 <has_text text="AGATCGGAAGAGCX"/>
1078 </assert_command>
1079 </test>
1080 <test expect_num_outputs="1">
1081 <param name="type" value="single" />
1082 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
1083 <section name="r1">
1084 <repeat name="adapters">
1085 <conditional name="adapter_source">
1086 <param name="adapter_source_list" value="builtin"/>
1087 <param name="adapter" value="TGTAGGCC"/>
1088 </conditional>
1089 </repeat>
1090 </section>
1091 <section name="adapter_options">
1092 <param name="internal" value="X"/>
1093 </section>
1094 <output name="out1" file="cutadapt_builtin_internal_adapter.out" ftype="fastq"/>
1095 <assert_command>
1096 <has_text text="TGTAGGCCX"/>
1097 </assert_command>
1098 </test>
1099 <!-- Ensure individual per adapter noindels parameter works -->
1100 <test expect_num_outputs="3">
1101 <param name="type" value="paired_collection" />
1102 <param name="input_1">
1103 <collection type="paired">
1104 <element name="forward" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
1105 <element name="reverse" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" />
1106 </collection>
1107 </param>
1108 <section name="r1">
1109 <repeat name="adapters">
1110 <conditional name="adapter_source">
1111 <param name="adapter_source_list" value="user"/>
1112 <param name="adapter" value="AGATCGGAAGAGC"/>
1113 </conditional>
1114 <param name="single_noindels" value=";noindels" />
1115 </repeat>
1116 </section>
1117 <section name="r2">
1118 <repeat name="adapters2">
1119 <conditional name="adapter_source2">
1120 <param name="adapter_source_list2" value="user"/>
1121 <param name="adapter2" value="AGATCGGAAGAGC"/>
1122 </conditional>
1123 <param name="single_noindels" value=";noindels" />
1124 </repeat>
1125 </section>
1126 <output_collection name="out_pairs" type="paired">
1127 <element name="forward" decompress="True" file="cutadapt_out1.fq.gz" ftype="fastq.gz" />
1128 <element name="reverse" decompress="True" file="cutadapt_out2.fq.gz" ftype="fastq.gz" />
1129 </output_collection>
1130 </test>
1131 <!-- Ensure specifying quality cutoff (for both reads) works -->
1132 <test expect_num_outputs="3">
1133 <param name="type" value="paired_collection" />
1134 <param name="input_1">
1135 <collection type="paired">
1136 <element name="forward" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
1137 <element name="reverse" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" />
1138 </collection>
1139 </param>
1140 <param name="quality_cutoff" value="5" />
1141 <section name="r1">
1142 <repeat name="adapters">
1143 <conditional name="adapter_source">
1144 <param name="adapter_source_list" value="user"/>
1145 <param name="adapter" value="AGATCGGAAGAGC"/>
1146 </conditional>
1147 </repeat>
1148 </section>
1149 <section name="r2">
1150 <repeat name="adapters2">
1151 <conditional name="adapter_source2">
1152 <param name="adapter_source_list2" value="user"/>
1153 <param name="adapter2" value="AGATCGGAAGAGC"/>
1154 </conditional>
1155 </repeat>
1156 <param name="quality_cutoff2" value="15,20"/>
1157 </section>
1158 <output_collection name="out_pairs" type="paired">
1159 <element name="forward" decompress="True" file="cutadapt_out1.fq.gz" ftype="fastq.gz" />
1160 <element name="reverse" decompress="True" file="cutadapt_out2_cutoff.fq.gz" ftype="fastq.gz" />
1161 </output_collection>
1162 </test>
1163 </tests>
1164
1165 <help><![CDATA[
1166
1167 .. class:: infomark
1168
1169 **What it does**
1170
1171 -------------------
1172
1173 **Cutadapt** finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads.
1174
1175 Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ sequencing adapter because the read is longer than the molecule that is sequenced, such as in microRNA, or CRISPR data, or Poly-A tails that are useful for pulling out RNA from your sample but often you don’t want them to be in your reads.
1176
1177 Cutadapt_ helps with these trimming tasks by finding the adapter or primer sequences in an error-tolerant way. It can also modify and filter reads in various ways. Cutadapt searches for the adapter in all reads and removes it when it finds it. Unless you use a filtering option, all reads that were present in the input file will also be present in the output file, some of them trimmed, some of them not. Even reads that were trimmed entirely (because the adapter was found in the very beginning) are output. All of this can be changed with options in the tool form above.
1178
1179 The tool is based on the **Open Source** Cutadapt_ tool. See the complete `Cutadapt documentation`_ for additional details. If you use Cutadapt, please cite *Marcel, 2011* under **Citations** below.
1180
1181 -------------------
1182
1183 **Inputs**
1184
1185 -------------------
1186
1187 Input files for Cutadapt need to be:
1188
1189 - FASTQ.GZ, FASTQ.BZ2, FASTQ or FASTA
1190
1191 To trim an adapter, input the ADAPTER sequence in plain text or in a FASTA file e.g. AACCGGTT (with the characters: **$**, **^**, **...**, if anchored or linked).
1192
1193 ============================================= ===================
1194 **Option** **Sequence**
1195 --------------------------------------------- -------------------
1196 3’ (End) Adapter ADAPTER
1197 Anchored 3’ Adapter ADAPTER$
1198
1199 5’ (Front) Adapter ADAPTER
1200 Anchored 5’ Adapter ^ADAPTER
1201
1202 5’ or 3’ (Both possible) ADAPTER
1203
1204 Linked Adapter - 3' (End) only ADAPTER1...ADAPTER2
1205 Non-anchored Linked Adapter - 5' (Front) only ADAPTER1...ADAPTER2
1206 ============================================= ===================
1207
1208 Below is an illustration of the allowed adapter locations relative to the read and depending on the adapter type:
1209
1210 .. image:: $PATH_TO_IMAGES/adapters.svg
1211
1212
1213 -------------------
1214
1215 *Example: Illumina TruSeq Adapters*
1216
1217 -------------------
1218
1219 If you have reads containing Illumina TruSeq adapters, for example, follow these steps.
1220
1221
1222 For Single-end reads as well as the first reads of Paired-end data:
1223
1224 **Read 1**
1225
1226 In the **3' (End) Adapters** option above, insert A + the “TruSeq Indexed Adapter” prefix that is common to all Indexed Adapter sequences, e.g insert:
1227
1228 AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
1229
1230
1231 For the second reads of Paired-end data:
1232
1233 **Read 2**
1234
1235 In the **3' (End) Adapters** option above, insert the reverse complement of the “TruSeq Universal Adapter”:
1236
1237 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
1238
1239 The adapter sequences can be found in the document `Illumina TruSeq Adapters De-Mystified`_.
1240
1241 -----------
1242
1243 **Outputs**
1244
1245 -----------
1246
1247 - Trimmed reads
1248
1249 Optionally, under **Output Options** you can choose to output
1250
1251 * Report
1252 * Info file
1253
1254
1255 **Report**
1256
1257 Cutadapt can output per-adapter statistics if you select to output the report above.
1258
1259 Example:
1260
1261 ::
1262
1263 This is cutadapt 3.4 with Python 3.9.2
1264
1265 Command line parameters: -j=1 -a AGATCGGAAGAGC -A AGATCGGAAGAGC --output=out1.fq.gz --paired-output=out2.fq.gz --error-rate=0.1 --times=1
1266 --overlap=3 --action=trim --minimum-length=30:40 --pair-filter=both --cut=0 bwa-mem-fastq1_assimetric_fq_gz.fq.gz bwa-mem-fastq2_assimetric_fq_gz.fq.gz
1267
1268 Processing reads on 1 core in paired-end mode ...
1269 Finished in 0.01 s (129 µs/read; 0.46 M reads/minute).
1270
1271 === Summary ===
1272
1273 Total read pairs processed: 99
1274 Read 1 with adapter: 2 (2.0%)
1275 Read 2 with adapter: 4 (4.0%)
1276 Pairs that were too short: 3 (3.0%)
1277 Pairs written (passing filters): 96 (97.0%)
1278
1279 Total basepairs processed: 48,291 bp
1280 Read 1: 24,147 bp
1281 Read 2: 24,144 bp
1282 Total written (filtered): 48,171 bp (99.8%)
1283 Read 1: 24,090 bp
1284 Read 2: 24,081 bp
1285
1286
1287 **Info file**
1288
1289 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file.
1290
1291 Columns contain the following data:
1292
1293 * **1st**: Read name
1294 * **2nd**: Number of errors
1295 * **3rd**: 0-based start coordinate of the adapter match
1296 * **4th**: 0-based end coordinate of the adapter match
1297 * **5th**: Sequence of the read to the left of the adapter match (can be empty)
1298 * **6th**: Sequence of the read that was matched to the adapter
1299 * **7th**: Sequence of the read to the right of the adapter match (can be empty)
1300 * **8th**: Name of the found adapter
1301 * **9th**: Quality values corresponding to sequence left of the adapter match (can be empty)
1302 * **10th**: Quality values corresponding to sequence matched to the adapter (can be empty)
1303 * **11th**: Quality values corresponding to sequence to the right of the adapter (can be empty)
1304
1305 The concatenation of columns 5-7 yields the full read sequence. Column 8 identifies the found adapter. Adapters without a name are numbered starting from 1. Fields 9-11 are empty if quality values are not available. Concatenating them yields the full sequence of quality values.
1306
1307 If no adapter was found, the format is as follows:
1308
1309 #. Read name
1310 #. The value -1
1311 #. The read sequence
1312 #. Quality values
1313
1314 When parsing the file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line.
1315
1316 If the --times option is used and greater than 1, each read can appear more than once in the info file. There will be one line for each found adapter, all with identical read names. Only for the first of those lines will the concatenation of columns 5-7 be identical to the original read sequence (and accordingly for columns 9-11). For subsequent lines, the shown sequence are the ones that were used in subsequent rounds of adapter trimming, that is, they get successively shorter.
1317
1318
1319 --------------------
1320
1321 **Rename Reads**
1322
1323 --------------------
1324
1325
1326 The --rename option expects a template string such as {id} extra_info {adapter_name} as a parameter. It can contain regular text and placeholders that consist of a name enclosed in curly braces ({placeholdername}).
1327
1328 The read name will be set to the template string in which the placeholders are replaced with the actual values relevant for the current read.
1329
1330 The following placeholders are currently available for single-end reads:
1331
1332 * {header} – the full, unchanged header
1333 * {id} – the read ID, that is, the part of the header before the first whitespace
1334 * {comment} – the part of the header after the whitespace following the ID
1335 * {adapter_name} – the name of adapter that was found in this read or no_adapter if there was none adapter match. If you use --times to do multiple rounds of adapter matching, this is the name of the last found adapter.
1336 * {match_sequence} – the sequence of the read that matched the adapter (including errors). If there was no adapter match, this is set to an empty string. If you use a linked adapter, this is to the two matching strings, separated by a comma.
1337 * {cut_prefix} – the prefix removed by the --cut (or -u) option (that is, when used with a positive length argument)
1338 * {cut_suffix} – the suffix removed by the --cut (or -u) option (that is, when used with a negative length argument)
1339 * {rc} – this is replaced with the string rc if the read was reverse complemented. This only applies when reverse complementing was requested
1340
1341 If the --rename option is used with paired-end data, the template is applied separately to both R1 and R2. That is, for R1, the placeholders are replaced with values from R1, and for R2, the placeholders are replaced with values from R2. For example, {comment} becomes R1’s comment in R1 and it becomes R2’s comment in R2.
1342
1343 For paired-end data, the placeholder {rn} is available (“read number”), and it is replaced with 1 in R1 and with 2 in R2.
1344
1345 In addition, it is possible to write a placeholder as {r1.placeholdername} or {r2.placeholdername}, which always takes the replacement value from R1 or R2, respectively.
1346 The {r1.placeholder} and {r2.placeholder} notation is available for all placeholders except {rn} and {id} because the read ID needs to be identical for both reads.
1347
1348 --------------------
1349
1350 **More Information**
1351
1352 --------------------
1353
1354 See the excellent `Cutadapt documentation`_
1355
1356 .. _Cutadapt: https://cutadapt.readthedocs.io/en/stable/
1357 .. _`Cutadapt documentation`: https://cutadapt.readthedocs.io/en/latest/index.html
1358 .. _`Illumina TruSeq Adapters De-Mystified`: http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf
1359
1360
1361 --------------------
1362
1363 **Galaxy Wrapper Development**
1364
1365 --------------------
1366
1367 Author: Lance Parsons <lparsons@princeton.edu>
1368
1369 ]]></help>
1370 <expand macro="citations" />
1371 </tool>