Mercurial > repos > lparsons > cutadapt
annotate cutadapt.xml @ 9:93d58ffe39f1 draft
Updated to version 1.6
author | lparsons |
---|---|
date | Mon, 06 Oct 2014 14:01:06 -0400 |
parents | 2d6671b10919 |
children | 01d94df2e32a |
rev | line source |
---|---|
9 | 1 <tool id="cutadapt" name="Cutadapt" version="1.6"> |
2 <description>Remove adapter sequences from Fastq/Fasta</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.6">cutadapt</requirement> | |
5 </requirements> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
6 <version_command>cutadapt --version</version_command> |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
7 |
9 | 8 <command>cutadapt |
9 #if $input.extension.startswith( "fastq"): | |
10 --format=fastq | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
11 #if $input.extension == "fastqillumina": |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
12 --quality-base=64 |
9 | 13 #end if |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
14 #if $input.extension == "fastqsolexa": |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
15 --quality-base=64 |
9 | 16 #end if |
17 #else | |
18 --format=$input.extension | |
19 #end if | |
20 #for $a in $adapters | |
21 #if $a.adapter_source.adapter_source_list == 'prebuilt': | |
22 --adapter="${a.adapter_source.adapter.fields.name}"='${a.adapter_source.adapter}' | |
23 #else if str($a.adapter_source.adapter_name) != "": | |
24 --adapter='${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}' | |
25 #else | |
26 --adapter='${a.adapter_source.adapter}' | |
27 #end if | |
28 #end for | |
29 #for $aa in $anywhere_adapters | |
30 #if $aa.anywhere_adapter_source.anywhere_adapter_source_list == 'prebuilt': | |
31 --anywhere="${aa.anywhere_adapter_source.anywhere_adapter.fields.name}"='${aa.anywhere_adapter_source.anywhere_adapter}' | |
32 #else if str($aa.anywhere_adapter_source.anywhere_adapter_name) != "": | |
33 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter_name}'='${aa.anywhere_adapter_source.anywhere_adapter}' | |
34 #else | |
35 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}' | |
36 #end if | |
37 #end for | |
38 #for $fa in $front_adapters | |
39 #if $fa.front_adapter_source.front_adapter_source_list == 'prebuilt': | |
40 --front="${fa.front_adapter_source.front_adapter.fields.name}"='${fa.front_adapter_source.front_adapter}' | |
41 #else if str($fa.front_adapter_source.front_adapter_name) != "": | |
42 --front='${fa.front_adapter_source.front_adapter_name}'='${fa.front_adapter_source.front_adapter}' | |
43 #else | |
44 --front='${fa.front_adapter_source.front_adapter}' | |
45 #end if | |
46 #end for | |
47 --error-rate=$error_rate | |
48 --times=$count | |
49 --overlap=$overlap | |
50 $no_indels | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
51 $match_read_wildcards |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
52 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
53 #if str( $output_filtering_options.output_filtering) == "filter": |
9 | 54 $output_filtering_options.discard |
55 $output_filtering_options.discard_untrimmed | |
56 $output_filtering_options.no_trim | |
57 $output_filtering_options.mask_adapter | |
58 #if str($output_filtering_options.min) != '0': | |
59 --minimum-length=$output_filtering_options.min | |
60 #end if | |
61 #if str($output_filtering_options.max) != '0': | |
62 --maximum-length=$output_filtering_options.max | |
63 #end if | |
64 #end if | |
65 | |
66 --output='$output' | |
67 | |
68 #if $paired_end.paired_end_boolean: | |
69 --paired-output='$paired_output' | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
70 #end if |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
71 |
9 | 72 #if str( $output_params.output_type ) == "additional": |
73 #if $output_params.rest_file: | |
74 --rest-file=$rest_output | |
75 #end if | |
76 #if $output_params.wildcard_file: | |
77 --wildcard-file=$wild_output | |
78 #end if | |
79 #if $output_params.too_short_file: | |
80 --too-short-output=$too_short_output | |
81 #end if | |
82 #if $output_params.too_long_file: | |
83 --too-long-output=$too_long_output | |
84 #end if | |
85 #if $output_params.untrimmed_file: | |
86 --untrimmed-output=$untrimmed_output | |
87 #if $paired_end.paired_end_boolean: | |
88 --untrimmed-paired-output=$untrimmed_paired_output | |
89 #end if | |
90 #end if | |
91 #if $output_params.info_file: | |
92 --info-file=$info_file | |
93 #end if | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
94 |
9 | 95 #end if |
96 | |
97 #if str( $read_modification_params.read_modification) == "modify": | |
98 #if str($read_modification_params.quality_cutoff) != '0': | |
99 --quality-cutoff=$read_modification_params.quality_cutoff | |
100 #end if | |
101 #if str($read_modification_params.cut) != '0': | |
102 --cut=$read_modification_params.cut | |
103 #end if | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
104 #if $read_modification_params.prefix != '': |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
105 --prefix="$read_modification_params.prefix" |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
106 #end if |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
107 #if $read_modification_params.suffix != '': |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
108 --suffix="$read_modification_params.suffix" |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
109 #end if |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
110 #if $read_modification_params.length_tag != '': |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
111 --length-tag="$read_modification_params.length_tag" |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
112 #end if |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
113 $read_modification_params.zero_cap |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
114 #end if |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
115 |
9 | 116 '$input' |
117 | |
118 #if $paired_end.paired_end_boolean: | |
119 '$input2' | |
120 #end if | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
121 |
9 | 122 > $report |
123 </command> | |
124 <inputs> | |
125 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa, fasta" name="input" type="data" optional="false" label="Fastq file to trim" length="100"/> | |
126 <conditional name="paired_end"> | |
127 <param name="paired_end_boolean" type="boolean" value="false" label="Track Paired Reads" help="This option will keep a second file synchronized if you use one of the filtering options that discards reads. It will NOT trim adapters off of the second read. You must run Cutadapt a second time on the output of the first run to trim adapters from both reads (see Cutadapt documentation for details)." /> | |
128 <when value="true"> | |
129 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa, fasta" name="input2" type="data" optional="false" label="Paired fastq file (NOT trimmed)" length="100"/> | |
130 </when> | |
131 </conditional> | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
132 |
9 | 133 <repeat name="adapters" title="3' Adapters" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed."> |
134 <conditional name="adapter_source"> | |
135 <param name="adapter_source_list" type="select" label="Source" > | |
136 <option value="prebuilt" selected="true">Standard (select from the list below)</option> | |
137 <option value="user">Enter custom sequence</option> | |
138 </param> | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
139 |
9 | 140 <when value="user"> |
141 <param name="adapter_name" size="30" label="Enter custom 3' adapter name (Optional)" type="text" value="" /> | |
142 <param name="adapter" size="30" label="Enter custom 3' adapter sequence" type="text" value="AATTGGCC" /> | |
143 </when> | |
144 | |
145 <when value="prebuilt"> | |
146 <param name="adapter" type="select" label="Choose 3' adapter"> | |
147 <options from_file="cutadapt_adapters.txt"> | |
148 <column name="name" index="1"/> | |
149 <column name="value" index="0"/> | |
150 </options> | |
151 </param> | |
152 </when> | |
153 </conditional> | |
154 </repeat> | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
155 |
9 | 156 <repeat name="anywhere_adapters" title="5' or 3' (Anywhere) Adapters" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed."> |
157 <conditional name="anywhere_adapter_source"> | |
158 <param name="anywhere_adapter_source_list" type="select" label="Source"> | |
159 <option value="prebuilt" selected="true">Standard (select from the list below)</option> | |
160 <option value="user">Enter custom sequence</option> | |
161 </param> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
162 |
9 | 163 <when value="user"> |
164 <param name="anywhere_adapter_name" size="30" label="Enter custom 5' or 3' adapter name (Optional)" type="text" value="" /> | |
165 <param name="anywhere_adapter" size="30" label="Enter custom 5' or 3' adapter sequence" type="text" value="AATTGGCC" /> | |
166 </when> | |
167 <when value="prebuilt"> | |
168 <param name="anywhere_adapter" type="select" label="Choose 5' or 3' adapter"> | |
169 <options from_file="cutadapt_adapters.txt"> | |
170 <column name="name" index="1"/> | |
171 <column name="value" index="0"/> | |
172 </options> | |
173 </param> | |
174 </when> | |
175 </conditional> | |
176 </repeat> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
177 |
9 | 178 <repeat name="front_adapters" title="5' (Front) Adapters" help="Sequence of an adapter that was ligated to the 5' end. If the adapter sequence starts with the character '^', the adapter is 'anchored'. An anchored adapter must appear in its entirety at the 5' end of the read (it is a prefix of the read). A non-anchored adapter may appear partially at the 5' end, or it may occur within the read. If it is found within a read, the sequence preceding the adapter is also trimmed. In all cases the adapter itself is trimmed."> |
179 <conditional name="front_adapter_source"> | |
180 <param name="front_adapter_source_list" type="select" label="Source"> | |
181 <option value="prebuilt" selected="true">Standard (select from the list below)</option> | |
182 <option value="user">Enter custom sequence</option> | |
183 </param> | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
184 |
9 | 185 <when value="user"> |
186 <param name="front_adapter_name" size="30" label="Enter custom 5' adapter name (Optional)" type="text" value="" /> | |
187 <param name="front_adapter" size="30" label="Enter custom 5' adapter sequence" type="text" value="AATTGGCC" /> | |
188 </when> | |
189 <when value="prebuilt"> | |
190 <param name="front_adapter" type="select" label="Choose 5' adapter"> | |
191 <options from_file="cutadapt_adapters.txt"> | |
192 <column name="name" index="1"/> | |
193 <column name="value" index="0"/> | |
194 </options> | |
195 </param> | |
196 </when> | |
197 </conditional> | |
198 </repeat> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
199 |
9 | 200 <param name="error_rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." /> |
201 <param name="no_indels" type="boolean" value="false" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." /> | |
202 <param name="count" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." /> | |
203 <param name="overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." /> | |
204 <param name="match_read_wildcards" type="boolean" value="false" truevalue="--match-read-wildcards" falsevalue="" label="Match Read Wildcards" help="Allow 'N's in the read as matches to the adapter." /> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
205 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
206 <conditional name="output_filtering_options"> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
207 <param name="output_filtering" type="select" label="Output filtering options" help="Options for filtering processed reads by those that contain the adapter or by minimum or maximum length"> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
208 <option value="default">Default (no filtering)</option> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
209 <option value="filter">Set Filters</option> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
210 </param> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
211 <when value="default" /> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
212 <when value="filter"> |
9 | 213 <param name="discard" type="boolean" value="false" truevalue="--discard" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" /> |
214 <param name="discard_untrimmed" type="boolean" value="false" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." /> | |
215 <param name="min" type="integer" min="0" optional="true" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." /> | |
216 <param name="max" type="integer" min="0" optional="true" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." /> | |
217 <param name="no_trim" type="boolean" value="false" truevalue="--no-trim" falsevalue="" label="Do not trim adapters" help="Match and redirect reads to output/untrimmed-output as usual, but don't remove the adapters (default: trim the adapters)." /> | |
218 <param name="mask_adapter" type="boolean" value="false" truevalue="--mask-adapter" falsevalue="" label="Mask Adapters" help="Mask adapter bases with 'N' instead of trimming them (default: trim adapters)." /> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
219 </when> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
220 </conditional> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
221 |
9 | 222 <conditional name="output_params"> |
223 <param name="output_type" type="select" label="Additional output options" help="By default all reads will be put in the same file. However, reads with adapters matching in the middle, unmatched reads, and too-short reads can be saved in separate files."> | |
224 <option value="default">Default</option> | |
225 <option value="additional">Additional output files</option> | |
226 </param> | |
227 <when value="default" /> | |
228 <when value="additional"> | |
229 <param name="info_file" type="boolean" value="false" label="Info File" help="Write information about each read and its adapter matches to a file."/> | |
230 <param name="rest_file" type="boolean" value="false" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/> | |
231 <param name="wildcard_file" type="boolean" value="false" label="Wildcard File" help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard positions to file."/> | |
232 <param name="too_short_file" type="boolean" value="false" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/> | |
233 <param name="too_long_file" type="boolean" value="false" label="Too Long Reads" help="Write reads that are too long (according to maximum length specified) to a file. (default: discard reads)"/> | |
234 <param name="untrimmed_file" type="boolean" value="false" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/> | |
235 </when> | |
236 </conditional> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
237 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
238 <conditional name="read_modification_params"> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
239 <param name="read_modification" type="select" label="Additional modifications to reads" help="Various options to trim reads based on quality, modify read names and quality scores"> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
240 <option value="none">No Read Modifications</option> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
241 <option value="modify">Set Modification Options</option> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
242 </param> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
243 <when value="none" /> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
244 <when value="modify"> |
9 | 245 <param name="cut" type="integer" optional="true" value="0" label="Cut bases from reads before adapter trimming" help="Remove bases from the beginning or end of each read before trimming adapters. If positive, the bases are removed from the beginning of each read. If negative, the bases are removed from the end of each read." /> |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
246 <param name="quality_cutoff" type="integer" min="0" optional="true" value="0" label="Quality cutoff" help="Trim low-quality ends from reads before adapter removal. The algorithm is the same as the one used by BWA (Subtract CUTOFF from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). Value of 0 means no quality trimming." /> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
247 <param name="prefix" label="Prefix" type="text" help="Add this prefix to read names" /> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
248 <param name="suffix" label="Suffix" type="text" help="Add this suffix to read names" /> |
9 | 249 <param name="strip_suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." /> |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
250 <param name="length_tag" label="Length Tag" type="text" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." /> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
251 <param name="zero_cap" type="boolean" value="false" label="Change negative quality values to zero (0)" truevalue="--zero-cap" falsevalue="" help="Workaround to avoid segmentation faults in BWA" /> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
252 </when> |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
253 </conditional> |
9 | 254 </inputs> |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
255 |
9 | 256 <outputs> |
257 <data format="txt" name="report" label="${tool.name} on ${on_string} (Report)" /> | |
258 <data format="input" name="output" metadata_source="input" label="${tool.name} on ${on_string} (Reads)"/> | |
259 <data format="input" name="paired_output" metadata_source="input" label="${tool.name} on ${on_string} (Paired Reads)" > | |
260 <filter>(paired_end['paired_end_boolean'] is True)</filter> | |
261 </data> | |
262 <data format="input" name="rest_output" metadata_source="input" label="${tool.name} on ${on_string} (Rest of Reads)" > | |
263 <filter>(output_params['output_type'] == "additional")</filter> | |
264 <filter>(output_params['rest_file'] is True)</filter> | |
265 </data> | |
266 <data format="txt" name="wild_output" metadata_source="input" label="${tool.name} on ${on_string} (Wildcard File)" > | |
267 <filter>(output_params['output_type'] == "additional")</filter> | |
268 <filter>(output_params['wildcard_file'] is True)</filter> | |
269 </data> | |
270 <data format="input" name="too_short_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Short Reads)" > | |
271 <filter>(output_params['output_type'] == "additional")</filter> | |
272 <filter>(output_params['too_short_file'] is True)</filter> | |
273 </data> | |
274 <data format="input" name="too_long_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Long Reads)" > | |
275 <filter>(output_params['output_type'] == "additional")</filter> | |
276 <filter>(output_params['too_long_file'] is True)</filter> | |
277 </data> | |
278 <data format="input" name="untrimmed_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Reads)" > | |
279 <filter>(output_params['output_type'] == "additional")</filter> | |
280 <filter>(output_params['untrimmed_file'] is True)</filter> | |
281 </data> | |
282 <data format="input" name="untrimmed_paired_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Paired Reads)" > | |
283 <filter>(paired_end['paired_end_boolean'] is True)</filter> | |
284 <filter>(output_params['output_type'] == "additional")</filter> | |
285 <filter>(output_params['untrimmed_file'] is True)</filter> | |
286 </data> | |
287 <data format="txt" name="info_file" metadata_source="input" label="${tool.name} on ${on_string} (Info File)" > | |
288 <filter>(output_params['output_type'] == "additional")</filter> | |
289 <filter>(output_params['info_file'] is True)</filter> | |
290 </data> | |
291 </outputs> | |
292 | |
293 <stdio> | |
294 <exit_code range="1" level="fatal" description="IOError, FormatError, or Interrupt" /> | |
295 <exit_code range="2" level="fatal" description="Invalid options specified" /> | |
296 <exit_code range="3:" level="fatal" description="Unknown error" /> | |
297 </stdio> | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
298 |
9 | 299 <tests> |
300 <test> | |
301 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/> | |
302 <param name="adapter_source_list" value="user"/> | |
303 <param name="adapter" value=""/> | |
304 <param name="anywhere_adapter_source_list" value="user"/> | |
305 <param name="anywhere_adapter" value="TTAGACATATCTCCGTCG"/> | |
306 <param name="front_adapter_source_list" value="user"/> | |
307 <param name="front_adapter" value=""/> | |
308 <param name="output_filtering" value="default"/> | |
309 <param name="read_modification" value="none"/> | |
310 <param name="output_type" value="default"/> | |
311 <output name="output" file="cutadapt_small.out"/> | |
312 </test> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
313 <!-- Unable to get tests to function with conditional parameters |
9 | 314 <test> |
315 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/> | |
316 <param name="adapter_source_list" value="user"/> | |
317 <param name="adapter" value="TTAGACATATCTCCGTCG"/> | |
318 <param name="anywhere_adapter_source_list" value="user"/> | |
319 <param name="anywhere_adapter" value=""/> | |
320 <param name="front_adapter_source_list" value="user"/> | |
321 <param name="front_adapter" value=""/> | |
322 <param name="output_filtering" value="filter"/> | |
323 <param name="discard" value="true"/> | |
324 <param name="read_modification" value="none"/> | |
325 <param name="output_type" value="default"/> | |
326 <output name="output" file="cutadapt_discard.out"/> | |
327 </test> | |
328 <test> | |
329 <param name="input" value="cutadapt_rest.fa" ftype="fasta"/> | |
330 <param name="adapter_source_list" value="user"/> | |
331 <param name="adapter" value="ADAPTER"/> | |
332 <param name="anywhere_adapter_source_list" value="user"/> | |
333 <param name="anywhere_adapter" value=""/> | |
334 <param name="front_adapter_source_list" value="user"/> | |
335 <param name="front_adapter" value=""/> | |
336 <param name="output_filtering" value="default"/> | |
337 <param name="read_modification" value="none"/> | |
338 <param name="output_type" value="additional"/> | |
339 <param name="rest_file" value="true"/> | |
340 <output name="output" file="cutadapt_rest.out"/> | |
341 <output name="rest_output" file="cutadapt_rest2.out"/> | |
342 </test> | |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
343 --> |
9 | 344 </tests> |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
345 |
9 | 346 <help> |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
347 Summary |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
348 ------- |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
349 This tool removes adapter sequences from DNA high-throughput |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
350 sequencing data. This is usually necessary when the read length of the |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
351 machine is longer than the molecule that is sequenced, such as in |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
352 microRNA data. |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
353 |
9 | 354 The tool is based on the opensource `cutadapt |
355 <http://code.google.com/p/cutadapt/>`_ tool. See the `complete cutadapt | |
356 documentation <https://cutadapt.readthedocs.org/en/latest/index.html>`_ for additional details. | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
357 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
358 ----- |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
359 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
360 Algorithm |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
361 --------- |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
362 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
363 cutadapt uses a simple semi-global alignment algorithm, without any special optimizations. |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
364 For speed, the algorithm is implemented as a Python extension module in ``calignmodule.c``. |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
365 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
366 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
367 Partial adapter matches |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
368 ----------------------- |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
369 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
370 Cutadapt correctly deals with partial adapter matches. As an example, suppose |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
371 your adapter sequence is ``ADAPTER`` (specified via 3' Adapters parameter). |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
372 If you have these input sequences:: |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
373 |
9 | 374 MYSEQUENCEADAPTER |
375 MYSEQUENCEADAP | |
376 MYSEQUENCEADAPTERSOMETHINGELSE | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
377 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
378 All of them will be trimmed to ``MYSEQUENCE``. If the sequence starts with an |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
379 adapter, like this:: |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
380 |
9 | 381 ADAPTERSOMETHING |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
382 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
383 It will be empty after trimming. |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
384 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
385 When the allowed error rate is sufficiently high, errors in |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
386 the adapter sequence are allowed. For example, ``ADABTER`` (1 mismatch), ``ADAPTR`` (1 deletion), |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
387 and ``ADAPPTER`` (1 insertion) will all be recognized if the error rate is set to 0.15. |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
388 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
389 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
390 Anchoring 5' adapters |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
391 --------------------- |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
392 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
393 If you specify a 5' (Front) adapter, the adapter may overlap the beginning of the read or |
9 | 394 occur anywhere whithin it. If it appears withing the read, the sequence that precedes it |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
395 will also be trimmed in addition to the adapter. For example when the adapter sequence is |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
396 ``ADAPTER``:: |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
397 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
398 HELLOADAPTERTHERE |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
399 APTERTHERE |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
400 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
401 will both be trimmed to ``THERE``. To avoid this, you can prefix the adapter with the character |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
402 ``^``. This will restrict the search, forcing the adapter to be a prefix of the read. With |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
403 the adapter sequence set to ``^ADAPTER``, only reads like this will be trimmed:: |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
404 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
405 ADAPTERHELLO |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
406 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
407 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
408 Allowing adapters anywhere |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
409 -------------------------- |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
410 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
411 Cutadapt assumes that any adapter specified via the 3' Adapter parameter |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
412 was ligated to the 3\' end of the sequence. This is the correct assumption for |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
413 at least the SOLiD and Illumina small RNA protocols and probably others. |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
414 The assumption is enforced by the alignment algorithm, which only finds the adapter |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
415 when its starting position is within the read. In other words, the 5' base of |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
416 the adapter must appear within the read. The adapter and all bases following |
9 | 417 it are removed. |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
418 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
419 If, on the other hand, your adapter can also be ligated to the 5' end (on |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
420 purpose or by accident), you should tell cutadapt so by using the Anywhere Adapter |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
421 parameter. It will then use a slightly different alignment algorithm |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
422 (so-called semiglobal alignment), which allows any type of overlap between the |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
423 adapter and the sequence. In particular, the adapter may appear only partially |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
424 in the beginning of the read, like this:: |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
425 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
426 PTERMYSEQUENCE |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
427 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
428 The decision which part of the read to remove is made as follows: If there is at |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
429 least one base before the found adapter, then the adapter is considered to be |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
430 a 3' adapter and the adapter itself and everything following it is removed. |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
431 Otherwise, the adapter is considered to be a 5' adapter and it is removed from |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
432 the read. |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
433 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
434 Here are some examples, which may make this clearer (left: read, right: trimmed |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
435 read):: |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
436 |
8
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
437 MYSEQUENCEADAPTER -> MYSEQUENCE (3' adapter) |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
438 MADAPTER -> M (3' adapter) |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
439 ADAPTERMYSEQUENCE -> MYSEQUENCE (5' adapter) |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
440 PTERMYSEQUENCE -> MYSEQUENCE (5' adapter) |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
441 |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
442 The regular algorithm (3' Adapter) would trim the first two examples in the same way, |
2d6671b10919
Updated to support cutadapt version 1.1 (also include automatic dependency installation)
lparsons
parents:
5
diff
changeset
|
443 but trim the third to an empty sequence and trim the fourth not at all. |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
444 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
445 |
9 | 446 Format of the info file |
447 ----------------------- | |
448 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file. The fields are: | |
449 | |
450 1. Read name | |
451 2. Number of errors | |
452 3. 0-based start coordinate of the adapter match | |
453 4. 0-based end coordinate of the adapter match | |
454 5. Sequence of the read to the left of the adapter match (can be empty) | |
455 6. Sequence of the read that was matched to the adapter | |
456 7. Sequence of the read to the right of the adapter match (can be empty) | |
457 8. Name of the found adapter. | |
458 | |
459 The concatenation of the fields 5-7 yields the full read sequence. In column 8, adapters without a name are numbered starting from 1. | |
460 | |
461 If no adapter was found, the format is as follows: | |
462 | |
463 1. Read name | |
464 2. The value -1 | |
465 3. The read sequence | |
466 | |
467 When parsing that file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line. Also, in the current version, when the *Match times* option is set to a value other than 1 (the default value), multiple lines are written to the info file for each read. | |
468 | |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
469 .. _cutadapt: http://code.google.com/p/cutadapt/ |
9 | 470 </help> |
0
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
471 |
8b064ea16722
Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff
changeset
|
472 </tool> |