0
|
1 <tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.0.1">
|
|
2 <options sanitize="False" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters -->
|
|
3 <description>reads on various attributes</description>
|
|
4 <command interpreter="python">fastq_manipulation.py $input_file $fastq_manipulation_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command>
|
|
5 <inputs>
|
|
6 <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility -->
|
|
7 <page>
|
|
8 <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/>
|
|
9 <!-- Match Reads -->
|
|
10 <repeat name="match_blocks" title="Match Reads">
|
|
11 <conditional name="match_type">
|
|
12 <param name="match_type_selector" type="select" label="Match Reads by">
|
|
13 <option value="identifier">Name/Identifier</option>
|
|
14 <option value="sequence">Sequence Content</option>
|
|
15 <option value="quality">Quality Score Content</option>
|
|
16 </param>
|
|
17 <when value="identifier">
|
|
18 <conditional name="match">
|
|
19 <param name="match_selector" type="select" label="Identifier Match Type">
|
|
20 <option value="regex">Regular Expression</option>
|
|
21 </param>
|
|
22 <when value="regex">
|
|
23 <param type="text" name="match_by" label="Match by" value=".*" />
|
|
24 </when>
|
|
25 </conditional>
|
|
26 </when>
|
|
27 <when value="sequence">
|
|
28 <conditional name="match">
|
|
29 <param name="match_selector" type="select" label="Sequence Match Type">
|
|
30 <option value="regex">Regular Expression</option>
|
|
31 </param>
|
|
32 <when value="regex">
|
|
33 <param type="text" name="match_by" label="Match by" value=".*" />
|
|
34 </when>
|
|
35 </conditional>
|
|
36 </when>
|
|
37 <when value="quality">
|
|
38 <conditional name="match">
|
|
39 <param name="match_selector" type="select" label="Quality Match Type">
|
|
40 <option value="regex">Regular Expression</option>
|
|
41 </param>
|
|
42 <when value="regex">
|
|
43 <param type="text" name="match_by" label="Match by" value=".*" />
|
|
44 </when>
|
|
45 </conditional>
|
|
46 </when>
|
|
47 </conditional>
|
|
48 </repeat>
|
|
49 <!-- Manipulate Matched Reads -->
|
|
50 <repeat name="manipulate_blocks" title="Manipulate Reads">
|
|
51 <conditional name="manipulation_type">
|
|
52 <param name="manipulation_type_selector" type="select" label="Manipulate Reads on">
|
|
53 <option value="identifier">Name/Identifier</option>
|
|
54 <option value="sequence">Sequence Content</option>
|
|
55 <option value="quality">Quality Score Content</option>
|
|
56 <option value="miscellaneous">Miscellaneous Actions</option>
|
|
57 </param>
|
|
58 <when value="identifier">
|
|
59 <conditional name="manipulation">
|
|
60 <param name="manipulation_selector" type="select" label="Identifier Manipulation Type">
|
|
61 <option value="translate">String Translate</option>
|
|
62 </param>
|
|
63 <when value="translate">
|
|
64 <param name="from" type="text" label="From" value="" />
|
|
65 <param name="to" type="text" label="To" value="" />
|
|
66 </when>
|
|
67 </conditional>
|
|
68 </when>
|
|
69 <when value="sequence">
|
|
70 <conditional name="manipulation">
|
|
71 <param name="manipulation_selector" type="select" label="Sequence Manipulation Type">
|
|
72 <option value="rev_comp">Reverse Complement</option>
|
|
73 <option value="rev_no_comp">Reverse, No Complement</option>
|
|
74 <option value="no_rev_comp">Complement, No Reverse</option>
|
|
75 <option value="trim">Trim</option>
|
|
76 <option value="dna_to_rna">DNA to RNA</option>
|
|
77 <option value="rna_to_dna">RNA to DNA</option>
|
|
78 <option value="translate">String Translate</option>
|
|
79 <option value="change_adapter">Change Adapter Base</option>
|
|
80 </param>
|
|
81 <when value="rev_comp">
|
|
82 <!-- no extra settings -->
|
|
83 </when>
|
|
84 <when value="rev_no_comp">
|
|
85 <!-- no extra settings -->
|
|
86 </when>
|
|
87 <when value="no_rev_comp">
|
|
88 <!-- no extra settings -->
|
|
89 </when>
|
|
90 <when value="trim">
|
|
91 <conditional name="offset_type">
|
|
92 <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)<br>Use Percentage for variable length reads (Roche/454)">
|
|
93 <option value="offsets_absolute" selected="true">Absolute Values</option>
|
|
94 <option value="offsets_percent">Percentage of Read Length</option>
|
|
95 </param>
|
|
96 <when value="offsets_absolute">
|
|
97 <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
|
|
98 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
|
|
99 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
|
|
100 </param>
|
|
101 <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
|
|
102 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
|
|
103 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
|
|
104 </param>
|
|
105 </when>
|
|
106 <when value="offsets_percent">
|
|
107 <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
|
|
108 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
|
|
109 </param>
|
|
110 <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
|
|
111 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
|
|
112 </param>
|
|
113 </when>
|
|
114 </conditional>
|
|
115 <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
|
|
116 </when>
|
|
117 <when value="dna_to_rna">
|
|
118 <!-- no extra settings -->
|
|
119 </when>
|
|
120 <when value="rna_to_dna">
|
|
121 <!-- no extra settings -->
|
|
122 </when>
|
|
123 <when value="translate">
|
|
124 <param name="from" type="text" label="From" value="" />
|
|
125 <param name="to" type="text" label="To" value="" />
|
|
126 </when>
|
|
127 <when value="change_adapter">
|
|
128 <param name="new_adapter" label="New Adapter" type="text" value="G" help="An empty string will remove the adapter base" />
|
|
129 </when>
|
|
130 </conditional>
|
|
131 </when>
|
|
132 <when value="quality">
|
|
133 <conditional name="manipulation">
|
|
134 <param name="manipulation_selector" type="select" label="Quality Manipulation Type">
|
|
135 <option value="translate">String Translate</option>
|
|
136 <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet-->
|
|
137 </param>
|
|
138 <when value="translate">
|
|
139 <param name="from" type="text" label="From" value="" />
|
|
140 <param name="to" type="text" label="To" value="" />
|
|
141 </when>
|
|
142 <when value="modify_each_score">
|
|
143 <param name="map_score" type="text" label="Modify Score by" value="$score + 1" />
|
|
144 </when>
|
|
145 </conditional>
|
|
146 </when>
|
|
147 <when value="miscellaneous">
|
|
148 <conditional name="manipulation">
|
|
149 <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type">
|
|
150 <option value="remove">Remove Read</option>
|
|
151 </param>
|
|
152 <when value="remove">
|
|
153 <!-- no extra settings -->
|
|
154 </when>
|
|
155 </conditional>
|
|
156 </when>
|
|
157 </conditional>
|
|
158 </repeat>
|
|
159 </page>
|
|
160 </inputs>
|
|
161 <configfiles>
|
|
162 <configfile name="fastq_manipulation_file">##create an importable module
|
|
163 #import binascii
|
|
164 import re
|
|
165 import binascii
|
|
166 from string import maketrans
|
|
167 ##does read match
|
|
168 def match_read( fastq_read ):
|
|
169 #for $match_block in $match_blocks:
|
|
170 #if $match_block['match_type']['match_type_selector'] == 'identifier':
|
|
171 search_target = fastq_read.identifier[1:] ##don't include @
|
|
172 #elif $match_block['match_type']['match_type_selector'] == 'sequence':
|
|
173 search_target = fastq_read.sequence
|
|
174 #elif $match_block['match_type']['match_type_selector'] == 'quality':
|
|
175 search_target = fastq_read.quality
|
|
176 #else:
|
|
177 #continue
|
|
178 #end if
|
|
179 if not re.search( binascii.unhexlify( "${ binascii.hexlify( str( match_block['match_type']['match']['match_by'] ) ) }" ), search_target ):
|
|
180 return False
|
|
181 #end for
|
|
182 return True
|
|
183 ##modify matched reads
|
|
184 def manipulate_read( fastq_read ):
|
|
185 new_read = fastq_read.clone()
|
|
186 #for $manipulate_block in $manipulate_blocks:
|
|
187 #if $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'identifier':
|
|
188 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
|
|
189 new_read.identifier = "@%s" % new_read.identifier[1:].translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
|
|
190 #end if
|
|
191 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'sequence':
|
|
192 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
|
|
193 new_read.sequence = new_read.sequence.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
|
|
194 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_comp':
|
|
195 new_read = new_read.reverse_complement()
|
|
196 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_no_comp':
|
|
197 new_read = new_read.reverse()
|
|
198 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'no_rev_comp':
|
|
199 new_read = new_read.complement()
|
|
200 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'trim':
|
|
201 #if $manipulate_block['manipulation_type']['manipulation']['offset_type']['base_offset_type'] == 'offsets_percent':
|
|
202 left_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) )
|
|
203 right_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) )
|
|
204 #else
|
|
205 left_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] }
|
|
206 right_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] }
|
|
207 #end if
|
|
208 if right_column_offset > 0:
|
|
209 right_column_offset = -right_column_offset
|
|
210 else:
|
|
211 right_column_offset = None
|
|
212 new_read = new_read.slice( left_column_offset, right_column_offset )
|
|
213 if not ( ${str( manipulate_block['manipulation_type']['manipulation']['keep_zero_length'] ) == 'keep_zero_length'} or len( new_read ) ):
|
|
214 return None
|
|
215 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'dna_to_rna':
|
|
216 new_read = new_read.sequence_as_DNA()
|
|
217 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rna_to_dna':
|
|
218 new_read = new_read.sequence_as_RNA()
|
|
219 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'change_adapter':
|
|
220 if new_read.sequence_space == 'color':
|
|
221 new_read = new_read.change_adapter( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['new_adapter'] ) ) }" ) )
|
|
222 #end if
|
|
223 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'quality':
|
|
224 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
|
|
225 new_read.quality = new_read.quality.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
|
|
226 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'map_score':
|
|
227 def score_method( score ):
|
|
228 raise Exception, "Unimplemented" ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions
|
|
229 new_read.quality_map( score_method )
|
|
230 #end if
|
|
231 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'miscellaneous':
|
|
232 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'remove':
|
|
233 return None
|
|
234 #end if
|
|
235 #else:
|
|
236 #continue
|
|
237 #end if
|
|
238 #end for
|
|
239 if new_read.description != "+":
|
|
240 new_read.description = "+%s" % new_read.identifier[1:] ##ensure description is still valid
|
|
241 return new_read
|
|
242 def match_and_manipulate_read( fastq_read ):
|
|
243 new_read = fastq_read
|
|
244 if match_read( fastq_read ):
|
|
245 new_read = manipulate_read( fastq_read )
|
|
246 return new_read
|
|
247 </configfile>
|
|
248 </configfiles>
|
|
249 <outputs>
|
|
250 <data format="input" name="output_file" />
|
|
251 </outputs>
|
|
252 <tests>
|
|
253 <!-- match all and do nothing -->
|
|
254 <test>
|
|
255 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
256 <param name="match_type_selector" value="identifier" />
|
|
257 <param name="match_selector" value="regex" />
|
|
258 <param name="match_by" value=".*" />
|
|
259 <param name="manipulation_type_selector" value="identifier" />
|
|
260 <param name="manipulation_selector" value="translate" />
|
|
261 <param name="from" value="" />
|
|
262 <param name="to" value="" />
|
|
263 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
|
|
264 </test>
|
|
265 <!-- match None and do nothing -->
|
|
266 <test>
|
|
267 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
268 <param name="match_type_selector" value="identifier" />
|
|
269 <param name="match_selector" value="regex" />
|
|
270 <param name="match_by" value="STRINGDOESNOTEXIST" />
|
|
271 <param name="manipulation_type_selector" value="identifier" />
|
|
272 <param name="manipulation_selector" value="translate" />
|
|
273 <param name="from" value="" />
|
|
274 <param name="to" value="" />
|
|
275 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
|
|
276 </test>
|
|
277 <!-- match all and remove -->
|
|
278 <test>
|
|
279 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
280 <param name="match_type_selector" value="identifier" />
|
|
281 <param name="match_selector" value="regex" />
|
|
282 <param name="match_by" value=".*" />
|
|
283 <param name="manipulation_type_selector" value="miscellaneous" />
|
|
284 <param name="manipulation_selector" value="remove" />
|
|
285 <output name="output_file" file="empty_file.dat" />
|
|
286 </test>
|
|
287 <!-- match None and remove -->
|
|
288 <test>
|
|
289 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
290 <param name="match_type_selector" value="identifier" />
|
|
291 <param name="match_selector" value="regex" />
|
|
292 <param name="match_by" value="STRINGDOESNOTEXIST" />
|
|
293 <param name="manipulation_type_selector" value="miscellaneous" />
|
|
294 <param name="manipulation_selector" value="remove" />
|
|
295 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
|
|
296 </test>
|
|
297 <!-- match all and trim to 4 inner-most bases -->
|
|
298 <test>
|
|
299 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
300 <param name="match_type_selector" value="identifier" />
|
|
301 <param name="match_selector" value="regex" />
|
|
302 <param name="match_by" value=".*" />
|
|
303 <param name="manipulation_type_selector" value="sequence" />
|
|
304 <param name="manipulation_selector" value="trim" />
|
|
305 <param name="base_offset_type" value="offsets_absolute"/>
|
|
306 <param name="left_column_offset" value="45"/>
|
|
307 <param name="right_column_offset" value="45"/>
|
|
308 <param name="keep_zero_length" value="true" />
|
|
309 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
|
|
310 </test>
|
|
311 <test>
|
|
312 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
313 <param name="match_type_selector" value="identifier" />
|
|
314 <param name="match_selector" value="regex" />
|
|
315 <param name="match_by" value=".*" />
|
|
316 <param name="manipulation_type_selector" value="sequence" />
|
|
317 <param name="manipulation_selector" value="trim" />
|
|
318 <param name="base_offset_type" value="offsets_percent"/>
|
|
319 <param name="left_column_offset" value="47.87"/>
|
|
320 <param name="right_column_offset" value="47.87"/>
|
|
321 <param name="keep_zero_length" value="true" />
|
|
322 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
|
|
323 </test>
|
|
324 <!-- match all and rev comp -->
|
|
325 <test>
|
|
326 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
327 <param name="match_type_selector" value="identifier" />
|
|
328 <param name="match_selector" value="regex" />
|
|
329 <param name="match_by" value=".*" />
|
|
330 <param name="manipulation_type_selector" value="sequence" />
|
|
331 <param name="manipulation_selector" value="rev_comp" />
|
|
332 <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" />
|
|
333 </test>
|
|
334 <!-- match all and rev comp, with ambiguous DNA -->
|
|
335 <test>
|
|
336 <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
337 <param name="match_type_selector" value="identifier" />
|
|
338 <param name="match_selector" value="regex" />
|
|
339 <param name="match_by" value=".*" />
|
|
340 <param name="manipulation_type_selector" value="sequence" />
|
|
341 <param name="manipulation_selector" value="rev_comp" />
|
|
342 <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" />
|
|
343 </test>
|
|
344 <!-- match all and rev comp, with ambiguous RNA -->
|
|
345 <test>
|
|
346 <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
347 <param name="match_type_selector" value="identifier" />
|
|
348 <param name="match_selector" value="regex" />
|
|
349 <param name="match_by" value=".*" />
|
|
350 <param name="manipulation_type_selector" value="sequence" />
|
|
351 <param name="manipulation_selector" value="rev_comp" />
|
|
352 <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" />
|
|
353 </test>
|
|
354 <!-- match first seq and rev comp -->
|
|
355 <test>
|
|
356 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
357 <param name="match_type_selector" value="identifier" />
|
|
358 <param name="match_selector" value="regex" />
|
|
359 <param name="match_by" value="FAKE0001" />
|
|
360 <param name="manipulation_type_selector" value="sequence" />
|
|
361 <param name="manipulation_selector" value="rev_comp" />
|
|
362 <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" />
|
|
363 </test>
|
|
364 <!-- match first seq and rev comp: i.e. undo above -->
|
|
365 <test>
|
|
366 <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" />
|
|
367 <param name="match_type_selector" value="identifier" />
|
|
368 <param name="match_selector" value="regex" />
|
|
369 <param name="match_by" value="FAKE0001" />
|
|
370 <param name="manipulation_type_selector" value="sequence" />
|
|
371 <param name="manipulation_selector" value="rev_comp" />
|
|
372 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
|
|
373 </test>
|
|
374 <!-- match all and DNA to RNA -->
|
|
375 <test>
|
|
376 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
377 <param name="match_type_selector" value="identifier" />
|
|
378 <param name="match_selector" value="regex" />
|
|
379 <param name="match_by" value=".*" />
|
|
380 <param name="manipulation_type_selector" value="sequence" />
|
|
381 <param name="manipulation_selector" value="dna_to_rna" />
|
|
382 <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" />
|
|
383 </test>
|
|
384 <!-- match all and RNA to DNA -->
|
|
385 <test>
|
|
386 <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" />
|
|
387 <param name="match_type_selector" value="identifier" />
|
|
388 <param name="match_selector" value="regex" />
|
|
389 <param name="match_by" value=".*" />
|
|
390 <param name="manipulation_type_selector" value="sequence" />
|
|
391 <param name="manipulation_selector" value="rna_to_dna" />
|
|
392 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
|
|
393 </test>
|
|
394 </tests>
|
|
395 <help>
|
|
396 This tool allows you to build complex manipulations to be applied to each matching read in a FASTQ file. A read must match all matching directives in order for it to be manipulated; if a read does not match, it is output in a non-modified manner. All reads matching will have each of the specified manipulations performed upon them, in the order specified.
|
|
397
|
|
398 Regular Expression Matches are made using re.search, see http://docs.python.org/library/re.html for more information.
|
|
399 All matching is performed on a single line string, regardless if e.g. the sequence or quality score spans multiple lines in the original file.
|
|
400
|
|
401 String translations are performed using string.translate, see http://docs.python.org/library/string.html#string.translate and http://docs.python.org/library/string.html#string.maketrans for more information.
|
|
402
|
|
403 .. class:: warningmark
|
|
404
|
|
405 Only color space reads can have adapter bases substituted.
|
|
406
|
|
407
|
|
408 -----
|
|
409
|
|
410 **Example**
|
|
411
|
|
412 Suppose you have a color space sanger formatted sequence (fastqcssanger) and you want to double-encode the color space into psuedo-nucleotide space (this is different from converting) to allow these reads to be used in tools which do not natively support it (using specially designed indexes). This tool can handle this manipulation, however, this is generally not recommended as results tend to be poorer than those produced from tools which are specially designed to handle color space data.
|
|
413
|
|
414 Steps:
|
|
415
|
|
416 1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads).
|
|
417 2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field).
|
|
418 3. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "String Translate" and set **From** to "0123." and **To** to "ACGTN".
|
|
419 4. Click Execute. The new history item will contained double-encoded psuedo-nucleotide space reads.
|
|
420
|
|
421 ------
|
|
422
|
|
423 **Citation**
|
|
424
|
|
425 If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_
|
|
426
|
|
427
|
|
428 </help>
|
|
429 </tool>
|