comparison cutadapt.xml @ 8:2d6671b10919 draft

Updated to support cutadapt version 1.1 (also include automatic dependency installation)
author lparsons
date Mon, 26 Nov 2012 17:37:26 -0500
parents 1dada50cca8a
children 93d58ffe39f1
comparison
equal deleted inserted replaced
7:1dda185ea2d0 8:2d6671b10919
1 <tool id="cutadapt" name="Cutadapt" version="0.9.5.a"> 1 <tool id="cutadapt" name="Cutadapt" version="1.1.a">
2 <description>Remove adapter sequences from Fastq/Fasta</description> 2 <description>Remove adapter sequences from Fastq/Fasta</description>
3 <requirements> 3 <requirements>
4 <requirement type="python-module">cutadapt</requirement> 4 <requirement type="package" version="1.1">cutadapt</requirement>
5 </requirements> 5 </requirements>
6 <version_command>cutadapt --version</version_command>
6 7
7 <command>cutadapt 8 <command>cutadapt
8 #if $input.extension.startswith( "fastq"): 9 #if $input.extension.startswith( "fastq"):
9 --format=fastq 10 --format=fastq
11 #if $input.extension == "fastqillumina":
12 --quality-base=64
13 #end if
14 #if $input.extension == "fastqsolexa":
15 --quality-base=64
16 #end if
10 #else 17 #else
11 --format=$input.extension 18 --format=$input.extension
12 #end if 19 #end if
13 #for $a in $adapters 20 #for $a in $adapters
14 --adapter='${a.adapter_source.adapter}' 21 --adapter='${a.adapter_source.adapter}'
15 #end for 22 #end for
16 #for $aa in $anywhere_adapters 23 #for $aa in $anywhere_adapters
17 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}' 24 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}'
18 #end for 25 #end for
26 #for $fa in $front_adapters
27 --front='${fa.front_adapter_source.front_adapter}'
28 #end for
19 --error-rate=$error_rate 29 --error-rate=$error_rate
20 --times=$count 30 --times=$count
21 --overlap=$overlap 31 --overlap=$overlap
22 #if str($min) != '0': 32 $match_read_wildcards
23 --minimum-length=$min 33 $no_match_adapters_wildcards
24 #end if 34
25 #if str($max) != '0': 35 #if str( $output_filtering_options.output_filtering) == "filter":
26 --maximum-length=$max 36 $output_filtering_options.discard
27 #end if 37 #if str($output_filtering_options.min) != '0':
28 #if str($quality_cutoff) != '0': 38 --minimum-length=$output_filtering_options.min
29 --quality-cutoff=$quality_cutoff 39 #end if
30 #end if 40 #if str($output_filtering_options.max) != '0':
31 $discard 41 --maximum-length=$output_filtering_options.max
42 #end if
43 #end if
44
32 --output='$output' 45 --output='$output'
33 #if str( $output_params.output_type ) == "additional": 46 #if str( $output_params.output_type ) == "additional":
34 #if $output_params.rest_file: 47 #if $output_params.rest_file:
35 --rest-file=$rest_output 48 --rest-file=$rest_output
49 #end if
50 #if $output_params.wildcard_file:
51 --wildcard-file=$wild_output
36 #end if 52 #end if
37 #if $output_params.too_short_file: 53 #if $output_params.too_short_file:
38 --too-short-output=$too_short_output 54 --too-short-output=$too_short_output
39 #end if 55 #end if
40 #if $output_params.untrimmed_file: 56 #if $output_params.untrimmed_file:
41 --untrimmed-output=$untrimmed_output 57 --untrimmed-output=$untrimmed_output
42 #end if 58 #end if
43 #end if 59 #end if
60
61 #if str( $read_modification_params.read_modification) == "modify":
62 #if str($read_modification_params.quality_cutoff) != '0':
63 --quality-cutoff=$read_modification_params.quality_cutoff
64 #end if
65 #if $read_modification_params.prefix != '':
66 --prefix="$read_modification_params.prefix"
67 #end if
68 #if $read_modification_params.suffix != '':
69 --suffix="$read_modification_params.suffix"
70 #end if
71 #if $read_modification_params.length_tag != '':
72 --length-tag="$read_modification_params.length_tag"
73 #end if
74 $read_modification_params.zero_cap
75 #end if
76
44 '$input' 77 '$input'
45 > $report 78 > $report
46 </command> 79 </command>
47 <inputs> 80 <inputs>
48 <param format="fastqsanger, fasta" name="input" type="data" optional="false" label="Fastq file to trim" length="100"/> 81 <param format="fastqsanger, fastqillumina, fastqsolexa, fasta" name="input" type="data" optional="false" label="Fastq file to trim" length="100"/>
49 82
50 <repeat name="adapters" title="3' Adapters"> 83 <repeat name="adapters" title="3' Adapters" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed.">
51 <conditional name="adapter_source"> 84 <conditional name="adapter_source">
52 <param name="adapter_source_list" type="select" label="Source" > 85 <param name="adapter_source_list" type="select" label="Source" >
53 <option value="prebuilt" selected="true">Standard (select from the list below)</option> 86 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
54 <option value="user">Enter custom sequence</option> 87 <option value="user">Enter custom sequence</option>
55 </param> 88 </param>
56 89
57 <when value="user"> 90 <when value="user">
58 <param name="adapter" size="30" label="Enter custom 3' adapter sequence" type="text" value="AATTGGCC" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed. If multiple adapters are specified, only the best matching adapter is trimmed."/> 91 <param name="adapter" size="30" label="Enter custom 3' adapter sequence" type="text" value="AATTGGCC" />
59 </when> 92 </when>
60 93
61 <when value="prebuilt"> 94 <when value="prebuilt">
62 <param name="adapter" type="select" label="Choose 3' adapter" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed. If multiple adapters are specified, only the best matching adapter is trimmed."> 95 <param name="adapter" type="select" label="Choose 3' adapter">
63 <options from_file="fastx_clipper_sequences.txt"> 96 <options from_file="fastx_clipper_sequences.txt">
64 <column name="name" index="1"/> 97 <column name="name" index="1"/>
65 <column name="value" index="0"/> 98 <column name="value" index="0"/>
66 </options> 99 </options>
67 </param> 100 </param>
75 <option value="prebuilt" selected="true">Standard (select from the list below)</option> 108 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
76 <option value="user">Enter custom sequence</option> 109 <option value="user">Enter custom sequence</option>
77 </param> 110 </param>
78 111
79 <when value="user"> 112 <when value="user">
80 <param name="anywhere_adapter" size="30" label="Enter custom 5' or 3' adapter sequence" type="text" value="AATTGGCC" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed."/> 113 <param name="anywhere_adapter" size="30" label="Enter custom 5' or 3' adapter sequence" type="text" value="AATTGGCC" />
81 </when> 114 </when>
82 <when value="prebuilt"> 115 <when value="prebuilt">
83 <param name="anywhere_adapter" type="select" label="Choose 5' or 3' adapter" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed."> 116 <param name="anywhere_adapter" type="select" label="Choose 5' or 3' adapter">
84 <options from_file="fastx_clipper_sequences.txt"> 117 <options from_file="fastx_clipper_sequences.txt">
85 <column name="name" index="1"/> 118 <column name="name" index="1"/>
86 <column name="value" index="0"/> 119 <column name="value" index="0"/>
87 </options> 120 </options>
88 </param> 121 </param>
89 </when> 122 </when>
90 </conditional> 123 </conditional>
91 </repeat> 124 </repeat>
92 125
126 <repeat name="front_adapters" title="5' (Front) Adapters" help="Sequence of an adapter that was ligated to the 5' end. If the adapter sequence starts with the character '^', the adapter is 'anchored'. An anchored adapter must appear in its entirety at the 5' end of the read (it is a prefix of the read). A non-anchored adapter may appear partially at the 5' end, or it may occur within the read. If it is found within a read, the sequence preceding the adapter is also trimmed. In all cases the adapter itself is trimmed.">
127 <conditional name="front_adapter_source">
128 <param name="front_adapter_source_list" type="select" label="Source">
129 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
130 <option value="user">Enter custom sequence</option>
131 </param>
132
133 <when value="user">
134 <param name="front_adapter" size="30" label="Enter custom 5' adapter sequence" type="text" value="AATTGGCC" />
135 </when>
136 <when value="prebuilt">
137 <param name="front_adapter" type="select" label="Choose 5' adapter">
138 <options from_file="fastx_clipper_sequences.txt">
139 <column name="name" index="1"/>
140 <column name="value" index="0"/>
141 </options>
142 </param>
143 </when>
144 </conditional>
145 </repeat>
146
93 <param name="error_rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." /> 147 <param name="error_rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." />
94 <param name="count" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." /> 148 <param name="count" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." />
95 <param name="overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified." /> 149 <param name="overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." />
96 <param name="discard" type="boolean" value="false" truevalue="--discard" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" /> 150
97 <param name="min" type="integer" min="0" optional="true" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." /> 151 <param name="match_read_wildcards" type="boolean" value="false" truevalue="--match-read-wildcards" falsevalue="" label="Match Read Wildcards" help="Allow 'N's in the read as matches to the adapter." />
98 <param name="max" type="integer" min="0" optional="true" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." /> 152 <param name="no_match_adapters_wildcards" type="boolean" value="false" truevalue="--no-match-adapter-wildcards" falsevalue="" label="Do Not Match Adapter Wildcards" help="Do not treat 'N' in the adapter sequence as wildcards. This is needed when you want to search for literal 'N' characters." />
99 <param name="quality_cutoff" type="integer" min="0" optional="true" value="0" label="Quality cutoff" help="Trim low-quality ends from reads before adapter removal. The algorithm is the same as the one used by BWA (Subtract CUTOFF from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). Value of 0 means no quality trimming." /> 153
100 <conditional name="output_params"> 154 <conditional name="output_filtering_options">
155 <param name="output_filtering" type="select" label="Output filtering options" help="Options for filtering processed reads by those that contain the adapter or by minimum or maximum length">
156 <option value="default">Default (no filtering)</option>
157 <option value="filter">Set Filters</option>
158 </param>
159 <when value="default" />
160 <when value="filter">
161 <param name="discard" type="boolean" value="false" truevalue="--discard" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" />
162 <param name="min" type="integer" min="0" optional="true" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." />
163 <param name="max" type="integer" min="0" optional="true" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." />
164 </when>
165 </conditional>
166
167 <conditional name="output_params">
101 <param name="output_type" type="select" label="Additional output options" help="By default all reads will be put in the same file. However, reads with adapters matching in the middle, unmatched reads, and too-short reads can be saved in separate files."> 168 <param name="output_type" type="select" label="Additional output options" help="By default all reads will be put in the same file. However, reads with adapters matching in the middle, unmatched reads, and too-short reads can be saved in separate files.">
102 <option value="default">Default</option> 169 <option value="default">Default</option>
103 <option value="additional">Additional output files</option> 170 <option value="additional">Additional output files</option>
104 </param> 171 </param>
105 <when value="default" /> 172 <when value="default" />
106 <when value="additional"> 173 <when value="additional">
107 <param name="rest_file" type="boolean" value="false" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/> 174 <param name="rest_file" type="boolean" value="false" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/>
175 <param name="wildcard_file" type="boolean" value="false" label="Wildcard File" help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard positions to file."/>
108 <param name="too_short_file" type="boolean" value="false" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/> 176 <param name="too_short_file" type="boolean" value="false" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/>
109 <param name="untrimmed_file" type="boolean" value="false" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/> 177 <param name="untrimmed_file" type="boolean" value="false" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/>
110 </when> 178 </when>
111 </conditional> 179 </conditional>
180
181 <conditional name="read_modification_params">
182 <param name="read_modification" type="select" label="Additional modifications to reads" help="Various options to trim reads based on quality, modify read names and quality scores">
183 <option value="none">No Read Modifications</option>
184 <option value="modify">Set Modification Options</option>
185 </param>
186 <when value="none" />
187 <when value="modify">
188 <param name="quality_cutoff" type="integer" min="0" optional="true" value="0" label="Quality cutoff" help="Trim low-quality ends from reads before adapter removal. The algorithm is the same as the one used by BWA (Subtract CUTOFF from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). Value of 0 means no quality trimming." />
189 <param name="prefix" label="Prefix" type="text" help="Add this prefix to read names" />
190 <param name="suffix" label="Suffix" type="text" help="Add this suffix to read names" />
191 <param name="length_tag" label="Length Tag" type="text" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." />
192 <param name="zero_cap" type="boolean" value="false" label="Change negative quality values to zero (0)" truevalue="--zero-cap" falsevalue="" help="Workaround to avoid segmentation faults in BWA" />
193 </when>
194 </conditional>
112 </inputs> 195 </inputs>
196
113 <outputs> 197 <outputs>
114 <data format="txt" name="report" label="${tool.name} on ${on_string} (Report)" /> 198 <data format="txt" name="report" label="${tool.name} on ${on_string} (Report)" />
115 <data format="input" name="output" metadata_source="input"/> 199 <data format="input" name="output" metadata_source="input"/>
116 <data format="input" name="rest_output" metadata_source="input" label="${tool.name} on ${on_string} (Rest of Reads)" > 200 <data format="input" name="rest_output" metadata_source="input" label="${tool.name} on ${on_string} (Rest of Reads)" >
117 <filter>(output_params['output_type'] == "additional")</filter> 201 <filter>(output_params['output_type'] == "additional")</filter>
118 <filter>(output_params['rest_file'] is True)</filter> 202 <filter>(output_params['rest_file'] is True)</filter>
203 </data>
204 <data format="txt" name="wild_output" metadata_source="input" label="${tool.name} on ${on_string} (Wildcard File)" >
205 <filter>(output_params['output_type'] == "additional")</filter>
206 <filter>(output_params['wild_file'] is True)</filter>
119 </data> 207 </data>
120 <data format="input" name="too_short_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Short Reads)" > 208 <data format="input" name="too_short_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Short Reads)" >
121 <filter>(output_params['output_type'] == "additional")</filter> 209 <filter>(output_params['output_type'] == "additional")</filter>
122 <filter>(output_params['too_short_file'] is True)</filter> 210 <filter>(output_params['too_short_file'] is True)</filter>
123 </data> 211 </data>
132 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/> 220 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/>
133 <param name="adapter_source_list" value="user"/> 221 <param name="adapter_source_list" value="user"/>
134 <param name="adapter" value=""/> 222 <param name="adapter" value=""/>
135 <param name="anywhere_adapter_source_list" value="user"/> 223 <param name="anywhere_adapter_source_list" value="user"/>
136 <param name="anywhere_adapter" value="TTAGACATATCTCCGTCG"/> 224 <param name="anywhere_adapter" value="TTAGACATATCTCCGTCG"/>
225 <param name="front_adapter_source_list" value="user"/>
226 <param name="front_adapter" value=""/>
227 <param name="output_filtering" value="default"/>
228 <param name="read_modification" value="none"/>
137 <param name="output_type" value="default"/> 229 <param name="output_type" value="default"/>
138 <output name="output" file="cutadapt_small.out"/> 230 <output name="output" file="cutadapt_small.out"/>
139 </test> 231 </test>
232 <!-- Unable to get tests to function with conditional parameters
140 <test> 233 <test>
141 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/> 234 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/>
142 <param name="adapter_source_list" value="user"/> 235 <param name="adapter_source_list" value="user"/>
143 <param name="adapter" value="TTAGACATATCTCCGTCG"/> 236 <param name="adapter" value="TTAGACATATCTCCGTCG"/>
144 <param name="anywhere_adapter_source_list" value="user"/> 237 <param name="anywhere_adapter_source_list" value="user"/>
145 <param name="anywhere_adapter" value=""/> 238 <param name="anywhere_adapter" value=""/>
239 <param name="front_adapter_source_list" value="user"/>
240 <param name="front_adapter" value=""/>
241 <param name="output_filtering" value="filter"/>
146 <param name="discard" value="true"/> 242 <param name="discard" value="true"/>
243 <param name="read_modification" value="none"/>
147 <param name="output_type" value="default"/> 244 <param name="output_type" value="default"/>
148 <output name="output" file="cutadapt_discard.out"/> 245 <output name="output" file="cutadapt_discard.out"/>
149 </test> 246 </test>
150 <test> 247 <test>
151 <param name="input" value="cutadapt_rest.fa" ftype="fasta"/> 248 <param name="input" value="cutadapt_rest.fa" ftype="fasta"/>
152 <param name="adapter_source_list" value="user"/> 249 <param name="adapter_source_list" value="user"/>
153 <param name="adapter" value="ADAPTER"/> 250 <param name="adapter" value="ADAPTER"/>
154 <param name="anywhere_adapter_source_list" value="user"/> 251 <param name="anywhere_adapter_source_list" value="user"/>
155 <param name="anywhere_adapter" value=""/> 252 <param name="anywhere_adapter" value=""/>
253 <param name="front_adapter_source_list" value="user"/>
254 <param name="front_adapter" value=""/>
255 <param name="output_filtering" value="default"/>
256 <param name="read_modification" value="none"/>
156 <param name="output_type" value="additional"/> 257 <param name="output_type" value="additional"/>
157 <param name="rest_file" value="true"/> 258 <param name="rest_file" value="true"/>
158 <output name="output" file="cutadapt_rest.out"/> 259 <output name="output" file="cutadapt_rest.out"/>
159 <output name="rest_output" file="cutadapt_rest2.out"/> 260 <output name="rest_output" file="cutadapt_rest2.out"/>
160 </test> 261 </test>
262 -->
161 </tests> 263 </tests>
162 264
163 <help> 265 <help>
266 Summary
267 -------
164 This tool removes adapter sequences from DNA high-throughput 268 This tool removes adapter sequences from DNA high-throughput
165 sequencing data. This is usually necessary when the read length of the 269 sequencing data. This is usually necessary when the read length of the
166 machine is longer than the molecule that is sequenced, such as in 270 machine is longer than the molecule that is sequenced, such as in
167 microRNA data. 271 microRNA data.
168 272
169 The tool is based on the opensource cutadapt_ tool. 273 The tool is based on the opensource cutadapt_ tool.
170 274
171 ----- 275 -----
172 276
173 **Algorithm** 277 Algorithm
278 ---------
174 279
175 cutadapt uses a simple semi-global alignment algorithm, without any special optimizations. 280 cutadapt uses a simple semi-global alignment algorithm, without any special optimizations.
176 For speed, the algorithm is implemented as a Python extension module in calignmodule.c. 281 For speed, the algorithm is implemented as a Python extension module in ``calignmodule.c``.
177 282
178 283
179 **Partial adapter matches** 284 Partial adapter matches
285 -----------------------
180 286
181 Cutadapt correctly deals with partial adapter matches. As an example, suppose 287 Cutadapt correctly deals with partial adapter matches. As an example, suppose
182 your adapter sequence is "ADAPTER" (specified via 3' Adapters parameter). 288 your adapter sequence is ``ADAPTER`` (specified via 3' Adapters parameter).
183 If you have these input sequences: 289 If you have these input sequences::
184
185 ::
186 290
187 MYSEQUENCEADAPTER 291 MYSEQUENCEADAPTER
188 MYSEQUENCEADAP 292 MYSEQUENCEADAP
189 MYSEQUENCEADAPTERSOMETHINGELSE 293 MYSEQUENCEADAPTERSOMETHINGELSE
190 294
191 All of them will be trimmed to "MYSEQUENCE". If the sequence starts with an 295 All of them will be trimmed to ``MYSEQUENCE``. If the sequence starts with an
192 adapter, like this: 296 adapter, like this::
193
194 ::
195 297
196 ADAPTERSOMETHING 298 ADAPTERSOMETHING
197 299
198 It will be empty after trimming. 300 It will be empty after trimming.
199 301
200 When the allowed error rate is sufficiently high, errors in 302 When the allowed error rate is sufficiently high, errors in
201 the adapter sequence are allowed. For example, ADABTER (1 mismatch), ADAPTR (1 deletion), 303 the adapter sequence are allowed. For example, ``ADABTER`` (1 mismatch), ``ADAPTR`` (1 deletion),
202 and ADAPPTER (1 insertion) will all be recognized if the error rate is set to 0.15. 304 and ``ADAPPTER`` (1 insertion) will all be recognized if the error rate is set to 0.15.
203 305
204 306
205 **Allowing adapters anywhere** 307 Anchoring 5' adapters
206 308 ---------------------
207 Cutadapt assumes that any adapter specified via the *3` Adapters* parameter 309
208 was ligated to the 3' end of the sequence. This is the correct assumption for 310 If you specify a 5' (Front) adapter, the adapter may overlap the beginning of the read or
311 occur anywhere whithin it. If it appears withing the read, the sequence that precedes it
312 will also be trimmed in addition to the adapter. For example when the adapter sequence is
313 ``ADAPTER``::
314
315 HELLOADAPTERTHERE
316 APTERTHERE
317
318 will both be trimmed to ``THERE``. To avoid this, you can prefix the adapter with the character
319 ``^``. This will restrict the search, forcing the adapter to be a prefix of the read. With
320 the adapter sequence set to ``^ADAPTER``, only reads like this will be trimmed::
321
322 ADAPTERHELLO
323
324
325 Allowing adapters anywhere
326 --------------------------
327
328 Cutadapt assumes that any adapter specified via the 3' Adapter parameter
329 was ligated to the 3\' end of the sequence. This is the correct assumption for
209 at least the SOLiD and Illumina small RNA protocols and probably others. 330 at least the SOLiD and Illumina small RNA protocols and probably others.
331 The assumption is enforced by the alignment algorithm, which only finds the adapter
332 when its starting position is within the read. In other words, the 5' base of
333 the adapter must appear within the read. The adapter and all bases following
334 it are remved.
210 335
211 If, on the other hand, your adapter can also be ligated to the 5' end (on 336 If, on the other hand, your adapter can also be ligated to the 5' end (on
212 purpose or by accident), you should tell cutadapt so by using the *5' or 3' (Anywhere) 337 purpose or by accident), you should tell cutadapt so by using the Anywhere Adapter
213 Adapters* parameter. It will then use a different alignment algorithm and 338 parameter. It will then use a slightly different alignment algorithm
214 correctly trim adapters that appear in the beginning of a read. An adapter 339 (so-called semiglobal alignment), which allows any type of overlap between the
215 specified this way will also be found if it appears only partially in the 340 adapter and the sequence. In particular, the adapter may appear only partially
216 beginning of a read. For example, these sequences 341 in the beginning of the read, like this::
217 342
218 :: 343 PTERMYSEQUENCE
219 344
220 ADAPTERMYSEQUENCE 345 The decision which part of the read to remove is made as follows: If there is at
221 PTERMYSEQUENCE 346 least one base before the found adapter, then the adapter is considered to be
222 347 a 3' adapter and the adapter itself and everything following it is removed.
223 will be trimmed to "MYSEQUENCE". Note that the regular algorithm would trim 348 Otherwise, the adapter is considered to be a 5' adapter and it is removed from
224 the first read to an empty sequence. 349 the read.
225 350
226 This parameter currently does not work with color space data. 351 Here are some examples, which may make this clearer (left: read, right: trimmed
352 read)::
353
354 MYSEQUENCEADAPTER -> MYSEQUENCE (3' adapter)
355 MADAPTER -> M (3' adapter)
356 ADAPTERMYSEQUENCE -> MYSEQUENCE (5' adapter)
357 PTERMYSEQUENCE -> MYSEQUENCE (5' adapter)
358
359 The regular algorithm (3' Adapter) would trim the first two examples in the same way,
360 but trim the third to an empty sequence and trim the fourth not at all.
227 361
228 362
229 .. _cutadapt: http://code.google.com/p/cutadapt/ 363 .. _cutadapt: http://code.google.com/p/cutadapt/
230 </help> 364 </help>
231 365