comparison fastq_manipulation.xml @ 2:4ac14b275aca draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_manipulation commit f2582539542b33240234e8ea6093e25d0aee9b6a
author devteam
date Sat, 30 Sep 2017 14:58:21 -0400
parents bb07615a5b6a
children 7861f3b10c68
comparison
equal deleted inserted replaced
1:bb07615a5b6a 2:4ac14b275aca
1 <tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.0.1"> 1 <tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.1.1">
2 <options sanitize="False" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters --> 2 <options sanitize="false" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters -->
3 <requirements> 3 <description>reads on various attributes</description>
4 <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> 4 <requirements>
5 </requirements> 5 <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement>
6 <description>reads on various attributes</description> 6 </requirements>
7 <command interpreter="python">fastq_manipulation.py $input_file $fastq_manipulation_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command> 7 <command><![CDATA[
8 <inputs> 8 gx-fastq-manipulation '$input_file' '$fastq_manipulation_file' '$output_file' '$output_file.files_path' '${input_file.extension[len('fastq'):]}'
9 <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility --> 9 ]]></command>
10 <page> 10 <configfiles>
11 <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/> 11 <configfile name="fastq_manipulation_file"><![CDATA[##create an importable module
12 <!-- Match Reads -->
13 <repeat name="match_blocks" title="Match Reads">
14 <conditional name="match_type">
15 <param name="match_type_selector" type="select" label="Match Reads by">
16 <option value="identifier">Name/Identifier</option>
17 <option value="sequence">Sequence Content</option>
18 <option value="quality">Quality Score Content</option>
19 </param>
20 <when value="identifier">
21 <conditional name="match">
22 <param name="match_selector" type="select" label="Identifier Match Type">
23 <option value="regex">Regular Expression</option>
24 </param>
25 <when value="regex">
26 <param type="text" name="match_by" label="Match by" value=".*" />
27 </when>
28 </conditional>
29 </when>
30 <when value="sequence">
31 <conditional name="match">
32 <param name="match_selector" type="select" label="Sequence Match Type">
33 <option value="regex">Regular Expression</option>
34 </param>
35 <when value="regex">
36 <param type="text" name="match_by" label="Match by" value=".*" />
37 </when>
38 </conditional>
39 </when>
40 <when value="quality">
41 <conditional name="match">
42 <param name="match_selector" type="select" label="Quality Match Type">
43 <option value="regex">Regular Expression</option>
44 </param>
45 <when value="regex">
46 <param type="text" name="match_by" label="Match by" value=".*" />
47 </when>
48 </conditional>
49 </when>
50 </conditional>
51 </repeat>
52 <!-- Manipulate Matched Reads -->
53 <repeat name="manipulate_blocks" title="Manipulate Reads">
54 <conditional name="manipulation_type">
55 <param name="manipulation_type_selector" type="select" label="Manipulate Reads on">
56 <option value="identifier">Name/Identifier</option>
57 <option value="sequence">Sequence Content</option>
58 <option value="quality">Quality Score Content</option>
59 <option value="miscellaneous">Miscellaneous Actions</option>
60 </param>
61 <when value="identifier">
62 <conditional name="manipulation">
63 <param name="manipulation_selector" type="select" label="Identifier Manipulation Type">
64 <option value="translate">String Translate</option>
65 </param>
66 <when value="translate">
67 <param name="from" type="text" label="From" value="" />
68 <param name="to" type="text" label="To" value="" />
69 </when>
70 </conditional>
71 </when>
72 <when value="sequence">
73 <conditional name="manipulation">
74 <param name="manipulation_selector" type="select" label="Sequence Manipulation Type">
75 <option value="rev_comp">Reverse Complement</option>
76 <option value="rev_no_comp">Reverse, No Complement</option>
77 <option value="no_rev_comp">Complement, No Reverse</option>
78 <option value="trim">Trim</option>
79 <option value="dna_to_rna">DNA to RNA</option>
80 <option value="rna_to_dna">RNA to DNA</option>
81 <option value="translate">String Translate</option>
82 <option value="change_adapter">Change Adapter Base</option>
83 </param>
84 <when value="rev_comp">
85 <!-- no extra settings -->
86 </when>
87 <when value="rev_no_comp">
88 <!-- no extra settings -->
89 </when>
90 <when value="no_rev_comp">
91 <!-- no extra settings -->
92 </when>
93 <when value="trim">
94 <conditional name="offset_type">
95 <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
96 <option value="offsets_absolute" selected="true">Absolute Values</option>
97 <option value="offsets_percent">Percentage of Read Length</option>
98 </param>
99 <when value="offsets_absolute">
100 <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
101 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
102 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
103 </param>
104 <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
105 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
106 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
107 </param>
108 </when>
109 <when value="offsets_percent">
110 <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
111 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
112 </param>
113 <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
114 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
115 </param>
116 </when>
117 </conditional>
118 <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
119 </when>
120 <when value="dna_to_rna">
121 <!-- no extra settings -->
122 </when>
123 <when value="rna_to_dna">
124 <!-- no extra settings -->
125 </when>
126 <when value="translate">
127 <param name="from" type="text" label="From" value="" />
128 <param name="to" type="text" label="To" value="" />
129 </when>
130 <when value="change_adapter">
131 <param name="new_adapter" label="New Adapter" type="text" value="G" help="An empty string will remove the adapter base" />
132 </when>
133 </conditional>
134 </when>
135 <when value="quality">
136 <conditional name="manipulation">
137 <param name="manipulation_selector" type="select" label="Quality Manipulation Type">
138 <option value="translate">String Translate</option>
139 <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet-->
140 </param>
141 <when value="translate">
142 <param name="from" type="text" label="From" value="" />
143 <param name="to" type="text" label="To" value="" />
144 </when>
145 <when value="modify_each_score">
146 <param name="map_score" type="text" label="Modify Score by" value="$score + 1" />
147 </when>
148 </conditional>
149 </when>
150 <when value="miscellaneous">
151 <conditional name="manipulation">
152 <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type">
153 <option value="remove">Remove Read</option>
154 </param>
155 <when value="remove">
156 <!-- no extra settings -->
157 </when>
158 </conditional>
159 </when>
160 </conditional>
161 </repeat>
162 </page>
163 </inputs>
164 <configfiles>
165 <configfile name="fastq_manipulation_file">##create an importable module
166 #import binascii 12 #import binascii
13 import binascii
167 import re 14 import re
168 import binascii 15 import six
169 from string import maketrans 16
17 if six.PY2:
18 from string import maketrans
19 else:
20 maketrans = str.maketrans
21
22
170 ##does read match 23 ##does read match
171 def match_read( fastq_read ): 24 def match_read(fastq_read):
172 #for $match_block in $match_blocks: 25 #for $match_block in $match_blocks:
173 #if $match_block['match_type']['match_type_selector'] == 'identifier': 26 #if $match_block['match_type']['match_type_selector'] == 'identifier':
174 search_target = fastq_read.identifier[1:] ##don't include @ 27 search_target = fastq_read.identifier[1:] ##don't include @
175 #elif $match_block['match_type']['match_type_selector'] == 'sequence': 28 #elif $match_block['match_type']['match_type_selector'] == 'sequence':
176 search_target = fastq_read.sequence 29 search_target = fastq_read.sequence
177 #elif $match_block['match_type']['match_type_selector'] == 'quality': 30 #elif $match_block['match_type']['match_type_selector'] == 'quality':
178 search_target = fastq_read.quality 31 search_target = fastq_read.quality
179 #else: 32 #else:
180 #continue 33 #continue
181 #end if 34 #end if
182 if not re.search( binascii.unhexlify( "${ binascii.hexlify( str( match_block['match_type']['match']['match_by'] ) ) }" ), search_target ): 35 if not re.search(binascii.unhexlify("${ binascii.hexlify(str(match_block['match_type']['match']['match_by'])) }").decode(), search_target):
183 return False 36 return False
184 #end for 37 #end for
185 return True 38 return True
39
40
186 ##modify matched reads 41 ##modify matched reads
187 def manipulate_read( fastq_read ): 42 def manipulate_read(fastq_read):
188 new_read = fastq_read.clone() 43 new_read = fastq_read.clone()
189 #for $manipulate_block in $manipulate_blocks: 44 #for $manipulate_block in $manipulate_blocks:
190 #if $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'identifier': 45 #if $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'identifier':
191 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate': 46 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
192 new_read.identifier = "@%s" % new_read.identifier[1:].translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) ) 47 new_read.identifier = "@%s" % new_read.identifier[1:].translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode()))
193 #end if 48 #end if
194 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'sequence': 49 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'sequence':
195 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate': 50 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
196 new_read.sequence = new_read.sequence.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) ) 51 new_read.sequence = new_read.sequence.translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode()))
197 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_comp': 52 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_comp':
198 new_read = new_read.reverse_complement() 53 new_read = new_read.reverse_complement()
199 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_no_comp': 54 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_no_comp':
200 new_read = new_read.reverse() 55 new_read = new_read.reverse()
201 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'no_rev_comp': 56 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'no_rev_comp':
202 new_read = new_read.complement() 57 new_read = new_read.complement()
203 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'trim': 58 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'trim':
204 #if $manipulate_block['manipulation_type']['manipulation']['offset_type']['base_offset_type'] == 'offsets_percent': 59 #if $manipulate_block['manipulation_type']['manipulation']['offset_type']['base_offset_type'] == 'offsets_percent':
205 left_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) ) 60 left_column_offset = int(round(float(${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] }) / 100.0 * float(len(new_read))))
206 right_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) ) 61 right_column_offset = int(round(float(${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] }) / 100.0 * float(len(new_read))))
207 #else 62 #else
208 left_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] } 63 left_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] }
209 right_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] } 64 right_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] }
210 #end if 65 #end if
211 if right_column_offset > 0: 66 if right_column_offset != 0:
212 right_column_offset = -right_column_offset 67 right_column_offset = -right_column_offset
213 else: 68 else:
214 right_column_offset = None 69 right_column_offset = None
215 new_read = new_read.slice( left_column_offset, right_column_offset ) 70 new_read = new_read.slice(left_column_offset, right_column_offset)
216 if not ( ${str( manipulate_block['manipulation_type']['manipulation']['keep_zero_length'] ) == 'keep_zero_length'} or len( new_read ) ): 71 if not (${str(manipulate_block['manipulation_type']['manipulation']['keep_zero_length']) == 'keep_zero_length'} or len(new_read)):
217 return None 72 return None
218 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'dna_to_rna': 73 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'dna_to_rna':
219 new_read = new_read.sequence_as_DNA() 74 new_read = new_read.sequence_as_DNA()
220 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rna_to_dna': 75 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rna_to_dna':
221 new_read = new_read.sequence_as_RNA() 76 new_read = new_read.sequence_as_RNA()
222 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'change_adapter': 77 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'change_adapter':
223 if new_read.sequence_space == 'color': 78 if new_read.sequence_space == 'color':
224 new_read = new_read.change_adapter( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['new_adapter'] ) ) }" ) ) 79 new_read = new_read.change_adapter(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['new_adapter'])) }").decode())
225 #end if 80 #end if
226 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'quality': 81 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'quality':
227 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate': 82 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
228 new_read.quality = new_read.quality.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) ) 83 new_read.quality = new_read.quality.translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode()))
229 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'map_score': 84 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'map_score':
230 def score_method( score ): 85 def score_method(score):
231 raise Exception, "Unimplemented" ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions 86 raise Exception("Unimplemented") ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions
232 new_read.quality_map( score_method ) 87 new_read.quality_map(score_method)
233 #end if 88 #end if
234 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'miscellaneous': 89 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'miscellaneous':
235 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'remove': 90 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'remove':
236 return None 91 return None
237 #end if 92 #end if
238 #else: 93 #else:
239 #continue 94 #continue
240 #end if 95 #end if
241 #end for 96 #end for
242 if new_read.description != "+": 97 if new_read.description != "+":
243 new_read.description = "+%s" % new_read.identifier[1:] ##ensure description is still valid 98 new_read.description = "+%s" % new_read.identifier[1:] ##ensure description is still valid
244 return new_read 99 return new_read
245 def match_and_manipulate_read( fastq_read ): 100
101
102 def match_and_manipulate_read(fastq_read):
246 new_read = fastq_read 103 new_read = fastq_read
247 if match_read( fastq_read ): 104 if match_read(fastq_read):
248 new_read = manipulate_read( fastq_read ) 105 new_read = manipulate_read(fastq_read)
249 return new_read 106 return new_read
250 </configfile> 107 ]]></configfile>
251 </configfiles> 108 </configfiles>
252 <outputs> 109 <inputs>
253 <data format="input" name="output_file" /> 110 <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility -->
254 </outputs> 111 <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer"/>
255 <tests> 112 <!-- Match Reads -->
256 <!-- match all and do nothing --> 113 <repeat name="match_blocks" title="Match Reads">
257 <test> 114 <conditional name="match_type">
258 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 115 <param name="match_type_selector" type="select" label="Match Reads by">
259 <param name="match_type_selector" value="identifier" /> 116 <option value="identifier">Name/Identifier</option>
260 <param name="match_selector" value="regex" /> 117 <option value="sequence">Sequence Content</option>
261 <param name="match_by" value=".*" /> 118 <option value="quality">Quality Score Content</option>
262 <param name="manipulation_type_selector" value="identifier" /> 119 </param>
263 <param name="manipulation_selector" value="translate" /> 120 <when value="identifier">
264 <param name="from" value="" /> 121 <conditional name="match">
265 <param name="to" value="" /> 122 <param name="match_selector" type="select" label="Identifier Match Type">
266 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> 123 <option value="regex">Regular Expression</option>
267 </test> 124 </param>
268 <!-- match None and do nothing --> 125 <when value="regex">
269 <test> 126 <param name="match_by" type="text" value=".*" label="Match by" />
270 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 127 </when>
271 <param name="match_type_selector" value="identifier" /> 128 </conditional>
272 <param name="match_selector" value="regex" /> 129 </when>
273 <param name="match_by" value="STRINGDOESNOTEXIST" /> 130 <when value="sequence">
274 <param name="manipulation_type_selector" value="identifier" /> 131 <conditional name="match">
275 <param name="manipulation_selector" value="translate" /> 132 <param name="match_selector" type="select" label="Sequence Match Type">
276 <param name="from" value="" /> 133 <option value="regex">Regular Expression</option>
277 <param name="to" value="" /> 134 </param>
278 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> 135 <when value="regex">
279 </test> 136 <param name="match_by" type="text" value=".*" label="Match by" />
280 <!-- match all and remove --> 137 </when>
281 <test> 138 </conditional>
282 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 139 </when>
283 <param name="match_type_selector" value="identifier" /> 140 <when value="quality">
284 <param name="match_selector" value="regex" /> 141 <conditional name="match">
285 <param name="match_by" value=".*" /> 142 <param name="match_selector" type="select" label="Quality Match Type">
286 <param name="manipulation_type_selector" value="miscellaneous" /> 143 <option value="regex">Regular Expression</option>
287 <param name="manipulation_selector" value="remove" /> 144 </param>
288 <output name="output_file" file="empty_file.dat" /> 145 <when value="regex">
289 </test> 146 <param name="match_by" type="text" value=".*" label="Match by" />
290 <!-- match None and remove --> 147 </when>
291 <test> 148 </conditional>
292 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 149 </when>
293 <param name="match_type_selector" value="identifier" /> 150 </conditional>
294 <param name="match_selector" value="regex" /> 151 </repeat>
295 <param name="match_by" value="STRINGDOESNOTEXIST" /> 152 <!-- Manipulate Matched Reads -->
296 <param name="manipulation_type_selector" value="miscellaneous" /> 153 <repeat name="manipulate_blocks" title="Manipulate Reads">
297 <param name="manipulation_selector" value="remove" /> 154 <conditional name="manipulation_type">
298 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> 155 <param name="manipulation_type_selector" type="select" label="Manipulate Reads on">
299 </test> 156 <option value="identifier">Name/Identifier</option>
300 <!-- match all and trim to 4 inner-most bases --> 157 <option value="sequence">Sequence Content</option>
301 <test> 158 <option value="quality">Quality Score Content</option>
302 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 159 <option value="miscellaneous">Miscellaneous Actions</option>
303 <param name="match_type_selector" value="identifier" /> 160 </param>
304 <param name="match_selector" value="regex" /> 161 <when value="identifier">
305 <param name="match_by" value=".*" /> 162 <conditional name="manipulation">
306 <param name="manipulation_type_selector" value="sequence" /> 163 <param name="manipulation_selector" type="select" label="Identifier Manipulation Type">
307 <param name="manipulation_selector" value="trim" /> 164 <option value="translate">String Translate</option>
308 <param name="base_offset_type" value="offsets_absolute"/> 165 </param>
309 <param name="left_column_offset" value="45"/> 166 <when value="translate">
310 <param name="right_column_offset" value="45"/> 167 <param name="from" type="text" value="" label="From" />
311 <param name="keep_zero_length" value="true" /> 168 <param name="to" type="text" value="" label="To" />
312 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" /> 169 </when>
313 </test> 170 </conditional>
314 <test> 171 </when>
315 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 172 <when value="sequence">
316 <param name="match_type_selector" value="identifier" /> 173 <conditional name="manipulation">
317 <param name="match_selector" value="regex" /> 174 <param name="manipulation_selector" type="select" label="Sequence Manipulation Type">
318 <param name="match_by" value=".*" /> 175 <option value="rev_comp">Reverse Complement</option>
319 <param name="manipulation_type_selector" value="sequence" /> 176 <option value="rev_no_comp">Reverse, No Complement</option>
320 <param name="manipulation_selector" value="trim" /> 177 <option value="no_rev_comp">Complement, No Reverse</option>
321 <param name="base_offset_type" value="offsets_percent"/> 178 <option value="trim">Trim</option>
322 <param name="left_column_offset" value="47.87"/> 179 <option value="dna_to_rna">DNA to RNA</option>
323 <param name="right_column_offset" value="47.87"/> 180 <option value="rna_to_dna">RNA to DNA</option>
324 <param name="keep_zero_length" value="true" /> 181 <option value="translate">String Translate</option>
325 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" /> 182 <option value="change_adapter">Change Adapter Base</option>
326 </test> 183 </param>
327 <!-- match all and rev comp --> 184 <when value="rev_comp" />
328 <test> 185 <when value="rev_no_comp" />
329 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 186 <when value="no_rev_comp" />
330 <param name="match_type_selector" value="identifier" /> 187 <when value="trim">
331 <param name="match_selector" value="regex" /> 188 <conditional name="offset_type">
332 <param name="match_by" value=".*" /> 189 <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
333 <param name="manipulation_type_selector" value="sequence" /> 190 <option value="offsets_absolute" selected="true">Absolute Values</option>
334 <param name="manipulation_selector" value="rev_comp" /> 191 <option value="offsets_percent">Percentage of Read Length</option>
335 <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" /> 192 </param>
336 </test> 193 <when value="offsets_absolute">
337 <!-- match all and rev comp, with ambiguous DNA --> 194 <param name="left_column_offset" type="integer" min="0" value="0" label="Offset from 5' end" help="Values start at 0, increasing from the left" />
338 <test> 195 <param name="right_column_offset" type="integer" value="0" label="Offset from 3' end" help="Values start at 0, increasing from the right; use a negative value to remove everything to the right of the absolute value of the position" />
339 <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" /> 196 </when>
340 <param name="match_type_selector" value="identifier" /> 197 <when value="offsets_percent">
341 <param name="match_selector" value="regex" /> 198 <param name="left_column_offset" type="float" min="0" max="100" value="0" label="Offset from 5' end" />
342 <param name="match_by" value=".*" /> 199 <param name="right_column_offset" type="float" min="0" max="100" value="0" label="Offset from 3' end" />
343 <param name="manipulation_type_selector" value="sequence" /> 200 </when>
344 <param name="manipulation_selector" value="rev_comp" /> 201 </conditional>
345 <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" /> 202 <param name="keep_zero_length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" checked="false" label="Keep reads with zero length" />
346 </test> 203 </when>
347 <!-- match all and rev comp, with ambiguous RNA --> 204 <when value="dna_to_rna" />
348 <test> 205 <when value="rna_to_dna" />
349 <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" /> 206 <when value="translate">
350 <param name="match_type_selector" value="identifier" /> 207 <param name="from" type="text" value="" label="From" />
351 <param name="match_selector" value="regex" /> 208 <param name="to" type="text" value="" label="To" />
352 <param name="match_by" value=".*" /> 209 </when>
353 <param name="manipulation_type_selector" value="sequence" /> 210 <when value="change_adapter">
354 <param name="manipulation_selector" value="rev_comp" /> 211 <param name="new_adapter" type="text" value="G" label="New adapter" help="An empty string will remove the adapter base" />
355 <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" /> 212 </when>
356 </test> 213 </conditional>
357 <!-- match first seq and rev comp --> 214 </when>
358 <test> 215 <when value="quality">
359 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 216 <conditional name="manipulation">
360 <param name="match_type_selector" value="identifier" /> 217 <param name="manipulation_selector" type="select" label="Quality Manipulation Type">
361 <param name="match_selector" value="regex" /> 218 <option value="translate">String Translate</option>
362 <param name="match_by" value="FAKE0001" /> 219 <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet-->
363 <param name="manipulation_type_selector" value="sequence" /> 220 </param>
364 <param name="manipulation_selector" value="rev_comp" /> 221 <when value="translate">
365 <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" /> 222 <param name="from" type="text" value="" label="From" />
366 </test> 223 <param name="to" type="text" value="" label="To" />
367 <!-- match first seq and rev comp: i.e. undo above --> 224 </when>
368 <test> 225 <!-- <when value="modify_each_score">
369 <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" /> 226 <param name="map_score" type="text" label="Modify Score by" value="$score + 1" />
370 <param name="match_type_selector" value="identifier" /> 227 </when> -->
371 <param name="match_selector" value="regex" /> 228 </conditional>
372 <param name="match_by" value="FAKE0001" /> 229 </when>
373 <param name="manipulation_type_selector" value="sequence" /> 230 <when value="miscellaneous">
374 <param name="manipulation_selector" value="rev_comp" /> 231 <conditional name="manipulation">
375 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> 232 <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type">
376 </test> 233 <option value="remove">Remove Read</option>
377 <!-- match all and DNA to RNA --> 234 </param>
378 <test> 235 <when value="remove" />
379 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> 236 </conditional>
380 <param name="match_type_selector" value="identifier" /> 237 </when>
381 <param name="match_selector" value="regex" /> 238 </conditional>
382 <param name="match_by" value=".*" /> 239 </repeat>
383 <param name="manipulation_type_selector" value="sequence" /> 240 </inputs>
384 <param name="manipulation_selector" value="dna_to_rna" /> 241 <outputs>
385 <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" /> 242 <data name="output_file" format_source="input_file" />
386 </test> 243 </outputs>
387 <!-- match all and RNA to DNA --> 244 <tests>
388 <test> 245 <!-- match all and do nothing -->
389 <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" /> 246 <test>
390 <param name="match_type_selector" value="identifier" /> 247 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
391 <param name="match_selector" value="regex" /> 248 <param name="match_type_selector" value="identifier" />
392 <param name="match_by" value=".*" /> 249 <param name="match_selector" value="regex" />
393 <param name="manipulation_type_selector" value="sequence" /> 250 <param name="match_by" value=".*" />
394 <param name="manipulation_selector" value="rna_to_dna" /> 251 <param name="manipulation_type_selector" value="identifier" />
395 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> 252 <param name="manipulation_selector" value="translate" />
396 </test> 253 <param name="from" value="" />
397 </tests> 254 <param name="to" value="" />
398 <help> 255 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
256 </test>
257 <!-- match None and do nothing -->
258 <test>
259 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
260 <param name="match_type_selector" value="identifier" />
261 <param name="match_selector" value="regex" />
262 <param name="match_by" value="STRINGDOESNOTEXIST" />
263 <param name="manipulation_type_selector" value="identifier" />
264 <param name="manipulation_selector" value="translate" />
265 <param name="from" value="" />
266 <param name="to" value="" />
267 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
268 </test>
269 <!-- match all and remove -->
270 <test>
271 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
272 <param name="match_type_selector" value="identifier" />
273 <param name="match_selector" value="regex" />
274 <param name="match_by" value=".*" />
275 <param name="manipulation_type_selector" value="miscellaneous" />
276 <param name="manipulation_selector" value="remove" />
277 <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />
278 </test>
279 <!-- match None and remove -->
280 <test>
281 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
282 <param name="match_type_selector" value="identifier" />
283 <param name="match_selector" value="regex" />
284 <param name="match_by" value="STRINGDOESNOTEXIST" />
285 <param name="manipulation_type_selector" value="miscellaneous" />
286 <param name="manipulation_selector" value="remove" />
287 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
288 </test>
289 <!-- match all and trim to 4 inner-most bases -->
290 <test>
291 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
292 <param name="match_type_selector" value="identifier" />
293 <param name="match_selector" value="regex" />
294 <param name="match_by" value=".*" />
295 <param name="manipulation_type_selector" value="sequence" />
296 <param name="manipulation_selector" value="trim" />
297 <param name="base_offset_type" value="offsets_absolute"/>
298 <param name="left_column_offset" value="45"/>
299 <param name="right_column_offset" value="45"/>
300 <param name="keep_zero_length" value="true" />
301 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" />
302 </test>
303 <test>
304 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
305 <param name="match_type_selector" value="identifier" />
306 <param name="match_selector" value="regex" />
307 <param name="match_by" value=".*" />
308 <param name="manipulation_type_selector" value="sequence" />
309 <param name="manipulation_selector" value="trim" />
310 <param name="base_offset_type" value="offsets_percent"/>
311 <param name="left_column_offset" value="47.87"/>
312 <param name="right_column_offset" value="47.87"/>
313 <param name="keep_zero_length" value="true" />
314 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" />
315 </test>
316 <!-- match all and rev comp -->
317 <test>
318 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
319 <param name="match_type_selector" value="identifier" />
320 <param name="match_selector" value="regex" />
321 <param name="match_by" value=".*" />
322 <param name="manipulation_type_selector" value="sequence" />
323 <param name="manipulation_selector" value="rev_comp" />
324 <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" ftype="fastqsanger" />
325 </test>
326 <!-- match all and rev comp, with ambiguous DNA -->
327 <test>
328 <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" />
329 <param name="match_type_selector" value="identifier" />
330 <param name="match_selector" value="regex" />
331 <param name="match_by" value=".*" />
332 <param name="manipulation_type_selector" value="sequence" />
333 <param name="manipulation_selector" value="rev_comp" />
334 <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" ftype="fastqsanger" />
335 </test>
336 <!-- match all and rev comp, with ambiguous RNA -->
337 <test>
338 <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" />
339 <param name="match_type_selector" value="identifier" />
340 <param name="match_selector" value="regex" />
341 <param name="match_by" value=".*" />
342 <param name="manipulation_type_selector" value="sequence" />
343 <param name="manipulation_selector" value="rev_comp" />
344 <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" ftype="fastqsanger" />
345 </test>
346 <!-- match first seq and rev comp -->
347 <test>
348 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
349 <param name="match_type_selector" value="identifier" />
350 <param name="match_selector" value="regex" />
351 <param name="match_by" value="FAKE0001" />
352 <param name="manipulation_type_selector" value="sequence" />
353 <param name="manipulation_selector" value="rev_comp" />
354 <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" />
355 </test>
356 <!-- match first seq and rev comp: i.e. undo above -->
357 <test>
358 <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" />
359 <param name="match_type_selector" value="identifier" />
360 <param name="match_selector" value="regex" />
361 <param name="match_by" value="FAKE0001" />
362 <param name="manipulation_type_selector" value="sequence" />
363 <param name="manipulation_selector" value="rev_comp" />
364 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
365 </test>
366 <!-- match all and DNA to RNA -->
367 <test>
368 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
369 <param name="match_type_selector" value="identifier" />
370 <param name="match_selector" value="regex" />
371 <param name="match_by" value=".*" />
372 <param name="manipulation_type_selector" value="sequence" />
373 <param name="manipulation_selector" value="dna_to_rna" />
374 <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" />
375 </test>
376 <!-- match all and RNA to DNA-->
377 <test>
378 <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" />
379 <param name="match_type_selector" value="identifier" />
380 <param name="match_selector" value="regex" />
381 <param name="match_by" value=".*" />
382 <param name="manipulation_type_selector" value="sequence" />
383 <param name="manipulation_selector" value="rna_to_dna" />
384 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
385 </test>
386 <!-- match all and RNA to DNA (gz compressed) -->
387 <test>
388 <param name="input_file" value="sanger_full_range_as_rna.fastqsanger.gz" ftype="fastqsanger.gz" />
389 <param name="match_type_selector" value="identifier" />
390 <param name="match_selector" value="regex" />
391 <param name="match_by" value=".*" />
392 <param name="manipulation_type_selector" value="sequence" />
393 <param name="manipulation_selector" value="rna_to_dna" />
394 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger.gz" decompress="true" />
395 </test>
396 <!-- match all and RNA to DNA (bz2 compressed) -->
397 <test>
398 <param name="input_file" value="sanger_full_range_as_rna.fastqsanger.bz2" ftype="fastqsanger.bz2" />
399 <param name="match_type_selector" value="identifier" />
400 <param name="match_selector" value="regex" />
401 <param name="match_by" value=".*" />
402 <param name="manipulation_type_selector" value="sequence" />
403 <param name="manipulation_selector" value="rna_to_dna" />
404 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger.bz2" decompress="true" />
405 </test>
406 </tests>
407 <help><![CDATA[
399 This tool allows you to build complex manipulations to be applied to each matching read in a FASTQ file. A read must match all matching directives in order for it to be manipulated; if a read does not match, it is output in a non-modified manner. All reads matching will have each of the specified manipulations performed upon them, in the order specified. 408 This tool allows you to build complex manipulations to be applied to each matching read in a FASTQ file. A read must match all matching directives in order for it to be manipulated; if a read does not match, it is output in a non-modified manner. All reads matching will have each of the specified manipulations performed upon them, in the order specified.
400 409
401 Regular Expression Matches are made using re.search, see http://docs.python.org/library/re.html for more information. 410 Regular Expression Matches are made using re.search, see http://docs.python.org/library/re.html for more information.
402 All matching is performed on a single line string, regardless if e.g. the sequence or quality score spans multiple lines in the original file. 411 All matching is performed on a single line string, regardless if e.g. the sequence or quality score spans multiple lines in the original file.
403 412
405 414
406 .. class:: warningmark 415 .. class:: warningmark
407 416
408 Only color space reads can have adapter bases substituted. 417 Only color space reads can have adapter bases substituted.
409 418
410
411 ----- 419 -----
412 420
413 **Example** 421 **Example**
414 422
415 Suppose you have a color space sanger formatted sequence (fastqcssanger) and you want to double-encode the color space into psuedo-nucleotide space (this is different from converting) to allow these reads to be used in tools which do not natively support it (using specially designed indexes). This tool can handle this manipulation, however, this is generally not recommended as results tend to be poorer than those produced from tools which are specially designed to handle color space data. 423 Suppose you have a color space sanger formatted sequence (fastqcssanger) and you want to double-encode the color space into psuedo-nucleotide space (this is different from converting) to allow these reads to be used in tools which do not natively support it (using specially designed indexes). This tool can handle this manipulation, however, this is generally not recommended as results tend to be poorer than those produced from tools which are specially designed to handle color space data.
416 424
417 Steps: 425 Steps:
418 426
419 1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads). 427 1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads).
420 2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field). 428 2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field).
421 3. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "String Translate" and set **From** to "0123." and **To** to "ACGTN". 429 3. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "String Translate" and set **From** to "0123." and **To** to "ACGTN".
422 4. Click Execute. The new history item will contained double-encoded psuedo-nucleotide space reads. 430 4. Click Execute. The new history item will contained double-encoded psuedo-nucleotide space reads.
423 431 ]]></help>
424 </help>
425 <citations> 432 <citations>
426 <citation type="doi">10.1093/bioinformatics/btq281</citation> 433 <citation type="doi">10.1093/bioinformatics/btq281</citation>
427 </citations> 434 </citations>
428 </tool> 435 </tool>