annotate tools/fastq/fastq_manipulation.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.0.1">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <options sanitize="False" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <description>reads on various attributes</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 <command interpreter="python">fastq_manipulation.py $input_file $fastq_manipulation_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 <page>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 <!-- Match Reads -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 <repeat name="match_blocks" title="Match Reads">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 <conditional name="match_type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 <param name="match_type_selector" type="select" label="Match Reads by">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <option value="identifier">Name/Identifier</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 <option value="sequence">Sequence Content</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 <option value="quality">Quality Score Content</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <when value="identifier">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <conditional name="match">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <param name="match_selector" type="select" label="Identifier Match Type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 <option value="regex">Regular Expression</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <when value="regex">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 <param type="text" name="match_by" label="Match by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 <when value="sequence">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 <conditional name="match">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 <param name="match_selector" type="select" label="Sequence Match Type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <option value="regex">Regular Expression</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <when value="regex">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <param type="text" name="match_by" label="Match by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 <when value="quality">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <conditional name="match">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <param name="match_selector" type="select" label="Quality Match Type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <option value="regex">Regular Expression</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <when value="regex">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <param type="text" name="match_by" label="Match by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 </repeat>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 <!-- Manipulate Matched Reads -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <repeat name="manipulate_blocks" title="Manipulate Reads">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 <conditional name="manipulation_type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 <param name="manipulation_type_selector" type="select" label="Manipulate Reads on">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 <option value="identifier">Name/Identifier</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 <option value="sequence">Sequence Content</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 <option value="quality">Quality Score Content</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 <option value="miscellaneous">Miscellaneous Actions</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 <when value="identifier">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <conditional name="manipulation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 <param name="manipulation_selector" type="select" label="Identifier Manipulation Type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 <option value="translate">String Translate</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 <when value="translate">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 <param name="from" type="text" label="From" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 <param name="to" type="text" label="To" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 <when value="sequence">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 <conditional name="manipulation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 <param name="manipulation_selector" type="select" label="Sequence Manipulation Type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 <option value="rev_comp">Reverse Complement</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 <option value="rev_no_comp">Reverse, No Complement</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 <option value="no_rev_comp">Complement, No Reverse</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 <option value="trim">Trim</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 <option value="dna_to_rna">DNA to RNA</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 <option value="rna_to_dna">RNA to DNA</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 <option value="translate">String Translate</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 <option value="change_adapter">Change Adapter Base</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 <when value="rev_comp">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 <!-- no extra settings -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 <when value="rev_no_comp">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 <!-- no extra settings -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 <when value="no_rev_comp">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 <!-- no extra settings -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 <when value="trim">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 <conditional name="offset_type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 <option value="offsets_absolute" selected="true">Absolute Values</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 <option value="offsets_percent">Percentage of Read Length</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 <when value="offsets_absolute">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 <when value="offsets_percent">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 <when value="dna_to_rna">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 <!-- no extra settings -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 <when value="rna_to_dna">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 <!-- no extra settings -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 <when value="translate">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 <param name="from" type="text" label="From" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 <param name="to" type="text" label="To" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 <when value="change_adapter">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 <param name="new_adapter" label="New Adapter" type="text" value="G" help="An empty string will remove the adapter base" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 <when value="quality">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 <conditional name="manipulation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 <param name="manipulation_selector" type="select" label="Quality Manipulation Type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 <option value="translate">String Translate</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet-->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 <when value="translate">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 <param name="from" type="text" label="From" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 <param name="to" type="text" label="To" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 <when value="modify_each_score">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 <param name="map_score" type="text" label="Modify Score by" value="$score + 1" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 <when value="miscellaneous">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 <conditional name="manipulation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 <option value="remove">Remove Read</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 <when value="remove">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 <!-- no extra settings -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 </repeat>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 </page>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 <configfiles>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 <configfile name="fastq_manipulation_file">##create an importable module
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 #import binascii
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 import re
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 import binascii
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 from string import maketrans
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 ##does read match
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 def match_read( fastq_read ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 #for $match_block in $match_blocks:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 #if $match_block['match_type']['match_type_selector'] == 'identifier':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 search_target = fastq_read.identifier[1:] ##don't include @
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 #elif $match_block['match_type']['match_type_selector'] == 'sequence':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 search_target = fastq_read.sequence
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 #elif $match_block['match_type']['match_type_selector'] == 'quality':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 search_target = fastq_read.quality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 #else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 #continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 if not re.search( binascii.unhexlify( "${ binascii.hexlify( str( match_block['match_type']['match']['match_by'] ) ) }" ), search_target ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 return False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 #end for
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 return True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 ##modify matched reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 def manipulate_read( fastq_read ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 new_read = fastq_read.clone()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 #for $manipulate_block in $manipulate_blocks:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 #if $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'identifier':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 new_read.identifier = "@%s" % new_read.identifier[1:].translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'sequence':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 new_read.sequence = new_read.sequence.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_comp':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 new_read = new_read.reverse_complement()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_no_comp':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 new_read = new_read.reverse()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'no_rev_comp':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 new_read = new_read.complement()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'trim':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 #if $manipulate_block['manipulation_type']['manipulation']['offset_type']['base_offset_type'] == 'offsets_percent':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 left_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 right_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 #else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 left_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 right_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 if right_column_offset > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 right_column_offset = -right_column_offset
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 right_column_offset = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 new_read = new_read.slice( left_column_offset, right_column_offset )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 if not ( ${str( manipulate_block['manipulation_type']['manipulation']['keep_zero_length'] ) == 'keep_zero_length'} or len( new_read ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 return None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'dna_to_rna':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216 new_read = new_read.sequence_as_DNA()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rna_to_dna':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218 new_read = new_read.sequence_as_RNA()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'change_adapter':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220 if new_read.sequence_space == 'color':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221 new_read = new_read.change_adapter( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['new_adapter'] ) ) }" ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'quality':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 new_read.quality = new_read.quality.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226 #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'map_score':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 def score_method( score ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228 raise Exception, "Unimplemented" ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 new_read.quality_map( score_method )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'miscellaneous':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'remove':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 return None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 #else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 #continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 #end for
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 if new_read.description != "+":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 new_read.description = "+%s" % new_read.identifier[1:] ##ensure description is still valid
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241 return new_read
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 def match_and_manipulate_read( fastq_read ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243 new_read = fastq_read
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 if match_read( fastq_read ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245 new_read = manipulate_read( fastq_read )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 return new_read
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 </configfile>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 </configfiles>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 <data format="input" name="output_file" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 <!-- match all and do nothing -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259 <param name="manipulation_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 <param name="manipulation_selector" value="translate" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 <param name="from" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 <param name="to" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265 <!-- match None and do nothing -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 <param name="match_by" value="STRINGDOESNOTEXIST" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271 <param name="manipulation_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272 <param name="manipulation_selector" value="translate" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273 <param name="from" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274 <param name="to" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277 <!-- match all and remove -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 <param name="manipulation_type_selector" value="miscellaneous" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284 <param name="manipulation_selector" value="remove" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 <output name="output_file" file="empty_file.dat" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 <!-- match None and remove -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 <param name="match_by" value="STRINGDOESNOTEXIST" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293 <param name="manipulation_type_selector" value="miscellaneous" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 <param name="manipulation_selector" value="remove" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297 <!-- match all and trim to 4 inner-most bases -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 <param name="manipulation_selector" value="trim" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
305 <param name="base_offset_type" value="offsets_absolute"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
306 <param name="left_column_offset" value="45"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
307 <param name="right_column_offset" value="45"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
308 <param name="keep_zero_length" value="true" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
309 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
310 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
311 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
312 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
313 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
314 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
315 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
316 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
317 <param name="manipulation_selector" value="trim" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
318 <param name="base_offset_type" value="offsets_percent"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
319 <param name="left_column_offset" value="47.87"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
320 <param name="right_column_offset" value="47.87"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
321 <param name="keep_zero_length" value="true" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
322 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
323 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
324 <!-- match all and rev comp -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
325 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
326 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
327 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
328 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
329 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
330 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
331 <param name="manipulation_selector" value="rev_comp" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
332 <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
333 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
334 <!-- match all and rev comp, with ambiguous DNA -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
335 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
336 <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
337 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
338 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
339 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
340 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
341 <param name="manipulation_selector" value="rev_comp" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
342 <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
343 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
344 <!-- match all and rev comp, with ambiguous RNA -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
345 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
346 <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
347 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
348 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
349 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
350 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
351 <param name="manipulation_selector" value="rev_comp" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
352 <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
353 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
354 <!-- match first seq and rev comp -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
355 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
356 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
357 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
358 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
359 <param name="match_by" value="FAKE0001" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
360 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
361 <param name="manipulation_selector" value="rev_comp" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
362 <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
363 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
364 <!-- match first seq and rev comp: i.e. undo above -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
365 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
366 <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
367 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
368 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
369 <param name="match_by" value="FAKE0001" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
370 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
371 <param name="manipulation_selector" value="rev_comp" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
372 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
373 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
374 <!-- match all and DNA to RNA -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
375 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
376 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
377 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
378 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
379 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
380 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
381 <param name="manipulation_selector" value="dna_to_rna" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
382 <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
383 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
384 <!-- match all and RNA to DNA -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
385 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
386 <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
387 <param name="match_type_selector" value="identifier" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
388 <param name="match_selector" value="regex" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
389 <param name="match_by" value=".*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
390 <param name="manipulation_type_selector" value="sequence" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
391 <param name="manipulation_selector" value="rna_to_dna" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
392 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
393 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
394 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
395 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
396 This tool allows you to build complex manipulations to be applied to each matching read in a FASTQ file. A read must match all matching directives in order for it to be manipulated; if a read does not match, it is output in a non-modified manner. All reads matching will have each of the specified manipulations performed upon them, in the order specified.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
397
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
398 Regular Expression Matches are made using re.search, see http://docs.python.org/library/re.html for more information.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
399 All matching is performed on a single line string, regardless if e.g. the sequence or quality score spans multiple lines in the original file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
400
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
401 String translations are performed using string.translate, see http://docs.python.org/library/string.html#string.translate and http://docs.python.org/library/string.html#string.maketrans for more information.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
402
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
403 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
404
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
405 Only color space reads can have adapter bases substituted.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
406
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
407
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
408 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
409
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
410 **Example**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
411
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
412 Suppose you have a color space sanger formatted sequence (fastqcssanger) and you want to double-encode the color space into psuedo-nucleotide space (this is different from converting) to allow these reads to be used in tools which do not natively support it (using specially designed indexes). This tool can handle this manipulation, however, this is generally not recommended as results tend to be poorer than those produced from tools which are specially designed to handle color space data.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
413
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
414 Steps:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
415
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
416 1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
417 2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
418 3. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "String Translate" and set **From** to "0123." and **To** to "ACGTN".
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
419 4. Click Execute. The new history item will contained double-encoded psuedo-nucleotide space reads.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
420
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
421 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
422
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
423 **Citation**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
424
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
425 If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
426
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
427
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
428 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
429 </tool>