comparison pileup_parser.xml @ 2:85bedbea8a12 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/pileup_parser commit ab627176cd4f6efe6d1fe4b85baa679aaa651eb1
author devteam
date Wed, 05 Oct 2016 06:30:36 -0400
parents 1670f0565000
children
comparison
equal deleted inserted replaced
1:1670f0565000 2:85bedbea8a12
1 <tool id="pileup_parser" name="Filter pileup" version="1.0.2">> 1 <tool id="pileup_parser" name="Filter pileup" version="1.0.2">
2 <description>on coverage and SNPs</description> 2 <description>on coverage and SNPs</description>
3 <requirements>
4 <requirement type="package" version="5.22.0">perl</requirement>
5 </requirements>
3 <command interpreter="perl"> 6 <command interpreter="perl">
4 #if $pileup_type.type_select == "six" #pileup_parser.pl $input "3" "5" "6" "4" $qv_cutoff $cvrg_cutoff $snps_only $interval "2" $out_file1 $diff $qc_base 7 #if $pileup_type.type_select == "six" #pileup_parser.pl $input "3" "5" "6" "4" $qv_cutoff $cvrg_cutoff $snps_only $interval "2" $out_file1 $diff $qc_base
5 #elif $pileup_type.type_select == "ten" #pileup_parser.pl $input "3" "9" "10" "8" $qv_cutoff $cvrg_cutoff $snps_only $interval "2" $out_file1 $diff $qc_base 8 #elif $pileup_type.type_select == "ten" #pileup_parser.pl $input "3" "9" "10" "8" $qv_cutoff $cvrg_cutoff $snps_only $interval "2" $out_file1 $diff $qc_base
6 #elif $pileup_type.type_select == "manual" #pileup_parser.pl $input $pileup_type.ref_base_column $pileup_type.read_bases_column $pileup_type.read_qv_column $pileup_type.cvrg_column $qv_cutoff $cvrg_cutoff $snps_only $interval $pileup_type.coord_column $out_file1 $diff $qc_base 9 #elif $pileup_type.type_select == "manual" #pileup_parser.pl $input $pileup_type.ref_base_column $pileup_type.read_bases_column $pileup_type.read_qv_column $pileup_type.cvrg_column $qv_cutoff $cvrg_cutoff $snps_only $interval $pileup_type.coord_column $out_file1 $diff $qc_base
7 #end if# 10 #end if#
42 </param> 45 </param>
43 <param name="qc_base" label="Print quality and base string?" type="select" help="See &quot;Example 4&quot; below for explanation"> 46 <param name="qc_base" label="Print quality and base string?" type="select" help="See &quot;Example 4&quot; below for explanation">
44 <option value="No">No</option> 47 <option value="No">No</option>
45 <option value="Yes" selected="true">Yes</option> 48 <option value="Yes" selected="true">Yes</option>
46 </param> 49 </param>
47
48 </inputs> 50 </inputs>
49 <outputs> 51 <outputs>
50 <data format="tabular" name="out_file1"> 52 <data format="tabular" name="out_file1">
51 <change_format> 53 <change_format>
52 <when input="interval" value="Yes" format="interval" /> 54 <when input="interval" value="Yes" format="interval" />
91 <param name="input" value="pileup_parser.10col.pileup"/> 93 <param name="input" value="pileup_parser.10col.pileup"/>
92 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes.pileup.out"/> 94 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes.pileup.out"/>
93 <param name="type_select" value="ten"/> 95 <param name="type_select" value="ten"/>
94 <param name="qv_cutoff" value="20" /> 96 <param name="qv_cutoff" value="20" />
95 <param name="cvrg_cutoff" value="3" /> 97 <param name="cvrg_cutoff" value="3" />
96 <param name="snps_only" value="Yes"/>q 98 <param name="snps_only" value="Yes"/>
97 <param name="interval" value="Yes" /> 99 <param name="interval" value="Yes" />
98 <param name="diff" value="No" /> 100 <param name="diff" value="No" />
99 <param name="qc_base" value="Yes" /> 101 <param name="qc_base" value="Yes" />
100 </test> 102 </test>
101 <test> 103 <test>
109 <param name="coord_column" value="2"/> 111 <param name="coord_column" value="2"/>
110 <param name="qv_cutoff" value="20" /> 112 <param name="qv_cutoff" value="20" />
111 <param name="cvrg_cutoff" value="3" /> 113 <param name="cvrg_cutoff" value="3" />
112 <param name="snps_only" value="Yes"/> 114 <param name="snps_only" value="Yes"/>
113 <param name="interval" value="Yes" /> 115 <param name="interval" value="Yes" />
114 <param name="diff" value="No" /> 116 <param name="diff" value="No" />
115 <param name="qc_base" value="Yes" /> 117 <param name="qc_base" value="Yes" />
116 </test> 118 </test>
117 <test> 119 <test>
118 <param name="input" value="pileup_parser.10col.pileup"/> 120 <param name="input" value="pileup_parser.10col.pileup"/>
119 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes-yes-yes.pileup.out"/> 121 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes-yes-yes.pileup.out"/>
120 <param name="type_select" value="manual"/> 122 <param name="type_select" value="manual"/>
121 <param name="ref_base_column" value="3"/> 123 <param name="ref_base_column" value="3"/>
122 <param name="read_bases_column" value="9"/> 124 <param name="read_bases_column" value="9"/>
125 <param name="coord_column" value="2"/> 127 <param name="coord_column" value="2"/>
126 <param name="qv_cutoff" value="20" /> 128 <param name="qv_cutoff" value="20" />
127 <param name="cvrg_cutoff" value="3" /> 129 <param name="cvrg_cutoff" value="3" />
128 <param name="snps_only" value="Yes"/> 130 <param name="snps_only" value="Yes"/>
129 <param name="interval" value="Yes" /> 131 <param name="interval" value="Yes" />
130 <param name="diff" value="Yes" /> 132 <param name="diff" value="Yes" />
131 <param name="qc_base" value="Yes" /> 133 <param name="qc_base" value="Yes" />
132 </test> 134 </test>
133 <test> 135 <test>
134 <param name="input" value="pileup_parser.10col.pileup"/> 136 <param name="input" value="pileup_parser.10col.pileup"/>
135 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes-yes-no.pileup.out"/> 137 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes-yes-no.pileup.out"/>
141 <param name="coord_column" value="2"/> 143 <param name="coord_column" value="2"/>
142 <param name="qv_cutoff" value="20" /> 144 <param name="qv_cutoff" value="20" />
143 <param name="cvrg_cutoff" value="3" /> 145 <param name="cvrg_cutoff" value="3" />
144 <param name="snps_only" value="Yes"/> 146 <param name="snps_only" value="Yes"/>
145 <param name="interval" value="Yes" /> 147 <param name="interval" value="Yes" />
146 <param name="diff" value="Yes" /> 148 <param name="diff" value="Yes" />
147 <param name="qc_base" value="No" /> 149 <param name="qc_base" value="No" />
148 </test> 150 </test>
149 151 </tests>
150
151 </tests>
152 <help> 152 <help>
153 153
154 **What it does** 154 **What it does**
155 155
156 Allows one to find sequence variants and/or sites covered by a specified number of reads with bases above a set quality threshold. The tool works on six and ten column pileup formats produced with *samtools pileup* command. However, it also allows you to specify columns in the input file manually. The tool assumes the following: 156 Allows one to find sequence variants and/or sites covered by a specified number of reads with bases above a set quality threshold. The tool works on six and ten column pileup formats produced with *samtools pileup* command. However, it also allows you to specify columns in the input file manually. The tool assumes the following:
167 .. _SAMTools: http://samtools.sourceforge.net/pileup.shtml 167 .. _SAMTools: http://samtools.sourceforge.net/pileup.shtml
168 168
169 **Six column pileup**:: 169 **Six column pileup**::
170 170
171 1 2 3 4 5 6 171 1 2 3 4 5 6
172 --------------------------------- 172 ---------------------------------
173 chrM 412 A 2 ., II 173 chrM 412 A 2 ., II
174 chrM 413 G 4 ..t, IIIH 174 chrM 413 G 4 ..t, IIIH
175 chrM 414 C 4 ..Ta III2 175 chrM 414 C 4 ..Ta III2
176 chrM 415 C 4 TTTt III7 176 chrM 415 C 4 TTTt III7
177 177
178 where:: 178 where::
179 179
180 Column Definition 180 Column Definition
181 ------- ---------------------------- 181 ------- ----------------------------
182 1 Chromosome 182 1 Chromosome
183 2 Position (1-based) 183 2 Position (1-based)
184 3 Reference base at that position 184 3 Reference base at that position
185 4 Coverage (# reads aligning over that position) 185 4 Coverage (# reads aligning over that position)
186 5 Bases within reads 186 5 Bases within reads
187 6 Quality values (phred33 scale, see Galaxy wiki for more) 187 6 Quality values (phred33 scale, see Galaxy wiki for more)
188 188
189 **Ten column pileup** 189 **Ten column pileup**
190 190
191 The `ten-column`__ pileup incorporates additional consensus information generated with the *-c* option of the *samtools pileup* command:: 191 The `ten-column`__ pileup incorporates additional consensus information generated with the *-c* option of the *samtools pileup* command::
192 192
193 193
226 226
227 - Number of **A** variants 227 - Number of **A** variants
228 - Number of **C** variants 228 - Number of **C** variants
229 - Number of **G** variants 229 - Number of **G** variants
230 - Number of **T** variants 230 - Number of **T** variants
231 - Number of read bases covering this position, where quality is equal to or higher than the value set by **Do not consider read bases with quality lower than** option. 231 - Number of read bases covering this position, where quality is equal to or higher than the value set by **Do not consider read bases with quality lower than** option.
232 232
233 Optionally, if **Print total number of differences?** is set to **Yes**, the tool will append the sixth column with the total number of deviants (see below). 233 Optionally, if **Print total number of differences?** is set to **Yes**, the tool will append the sixth column with the total number of deviants (see below).
234 234
235 2. If **Convert coordinates to intervals?** is set to **Yes**, the tool replaces the position column (typically the second column) with a pair of tab-delimited start/end values. 235 2. If **Convert coordinates to intervals?** is set to **Yes**, the tool replaces the position column (typically the second column) with a pair of tab-delimited start/end values.
236 236
244 you will get:: 244 you will get::
245 245
246 chrM 413 G 4 ..t, IIIH 0 0 2 1 3 246 chrM 413 G 4 ..t, IIIH 0 0 2 1 3
247 chrM 414 C 4 ..Ta III2 1 1 0 1 3 247 chrM 414 C 4 ..Ta III2 1 1 0 1 3
248 chrM 415 C 4 TTTt III7 0 0 0 4 4 248 chrM 415 C 4 TTTt III7 0 0 0 4 4
249 249
250 where:: 250 where::
251 251
252 Column Definition 252 Column Definition
253 ------- ---------------------------- 253 ------- ----------------------------
254 1 Chromosome 254 1 Chromosome
262 9 Number of G variants 262 9 Number of G variants
263 10 Number of T variants 263 10 Number of T variants
264 11 Quality adjusted coverage: 264 11 Quality adjusted coverage:
265 12 Number of read bases (i.e., # of reads) with quality above the set threshold 265 12 Number of read bases (i.e., # of reads) with quality above the set threshold
266 13 Total number of deviants (if Convert coordinates to intervals? is set to yes) 266 13 Total number of deviants (if Convert coordinates to intervals? is set to yes)
267 267
268 if **Print total number of differences?** is set to **Yes**, you will get:: 268 if **Print total number of differences?** is set to **Yes**, you will get::
269 269
270 chrM 413 G 4 ..t, IIIH 0 0 2 1 3 1 270 chrM 413 G 4 ..t, IIIH 0 0 2 1 3 1
271 chrM 414 C 4 ..Ta III2 1 2 0 1 3 2 271 chrM 414 C 4 ..Ta III2 1 2 0 1 3 2
272 chrM 415 C 4 TTTt III7 0 0 0 4 4 0 272 chrM 415 C 4 TTTt III7 0 0 0 4 4 0
273 273
274 Note the additional column 13, that contains the number of deviant reads (e.g., there are two deviants, T and a, for position 414). 274 Note the additional column 13, that contains the number of deviant reads (e.g., there are two deviants, T and a, for position 414).
275 275
276
277 Finally, if **Convert coordinates to intervals?** is set to **Yes**, you will get one additional column with the end coordinate:: 276 Finally, if **Convert coordinates to intervals?** is set to **Yes**, you will get one additional column with the end coordinate::
278 277
279 chrM 412 413 G 4 ..t, III2 0 0 2 1 3 278 chrM 412 413 G 4 ..t, III2 0 0 2 1 3
280 chrM 414 415 C 4 ..Ta III2 1 2 0 1 3 279 chrM 414 415 C 4 ..Ta III2 1 2 0 1 3
281 chrM 414 415 C 4 TTTt III7 0 0 0 4 4 280 chrM 414 415 C 4 TTTt III7 0 0 0 4 4
282 281
283 where:: 282 where::
284 283
285 Column Definition 284 Column Definition
286 ------- ---------------------------- 285 ------- ----------------------------
287 1 Chromosome 286 1 Chromosome
297 11 Number of T variants 296 11 Number of T variants
298 12 Quality adjusted coverage 297 12 Quality adjusted coverage
299 13 Total number of deviants (if Convert coordinates to intervals? is set to yes) 298 13 Total number of deviants (if Convert coordinates to intervals? is set to yes)
300 299
301 300
302 Note that in this case the coordinates of SNPs were converted to intervals, where the start coordinate is 0-based and the end coordinate in 1-based using the UCSC Table Browser convention. 301 Note that in this case the coordinates of SNPs were converted to intervals, where the start coordinate is 0-based and the end coordinate in 1-based using the UCSC Table Browser convention.
303 302
304 Although three positions have variants in the original file (413, 414, and 415), only 413 and 415 are reported because the quality values associated with these two SNPs are above the threshold of 20. In the case of 414 the **a** allele has a quality value of 17 ( ord("2")-33 ), and is therefore not reported. Note that five columns have been added to each of the reported lines:: 303 Although three positions have variants in the original file (413, 414, and 415), only 413 and 415 are reported because the quality values associated with these two SNPs are above the threshold of 20. In the case of 414 the **a** allele has a quality value of 17 ( ord("2")-33 ), and is therefore not reported. Note that five columns have been added to each of the reported lines::
305 304
306 chrM 413 G 4 ..t, IIIH 0 0 2 1 3 305 chrM 413 G 4 ..t, IIIH 0 0 2 1 3
307 306
308 Here, there is one variant, **t**. Because the fourth column represents **T** counts, it is incremented by 1. The last column shows that at this position, three reads have bases above the quality threshold of 20. 307 Here, there is one variant, **t**. Because the fourth column represents **T** counts, it is incremented by 1. The last column shows that at this position, three reads have bases above the quality threshold of 20.
309 308
310 ----- 309 -----
311 310
312 **Example 1**: Just variants 311 **Example 1**: Just variants
315 314
316 chrM 412 A 2 ., II 315 chrM 412 A 2 ., II
317 chrM 413 G 4 ..t, III2 316 chrM 413 G 4 ..t, III2
318 chrM 414 C 4 ..Ta III2 317 chrM 414 C 4 ..Ta III2
319 chrM 415 C 4 TTTt III7 318 chrM 415 C 4 TTTt III7
320 319
321 To call all variants (with no restriction by coverage) with quality above phred value of 20, we will need to set the parameters as follows: 320 To call all variants (with no restriction by coverage) with quality above phred value of 20, we will need to set the parameters as follows:
322 321
323 .. image:: pileup_parser_help1.png 322 .. image:: pileup_parser_help1.png
324 323
325 Running the tool with these parameters will return:: 324 Running the tool with these parameters will return::
326 325
327 chrM 413 G 4 ..t, IIIH 0 0 0 1 3 326 chrM 413 G 4 ..t, IIIH 0 0 0 1 3
328 chrM 414 C 4 ..Ta III2 0 2 0 1 3 327 chrM 414 C 4 ..Ta III2 0 2 0 1 3
329 chrM 415 C 4 TTTt III7 0 0 0 4 4 328 chrM 415 C 4 TTTt III7 0 0 0 4 4
330 329
331 **Note** that position 414 is not reported because the *a* variant has associated quality value of 17 (because ord('2')-33 = 17) and is below the phred threshold of 20 set by the **Count variants with quality above this value** parameter. 330 **Note** that position 414 is not reported because the *a* variant has associated quality value of 17 (because ord('2')-33 = 17) and is below the phred threshold of 20 set by the **Count variants with quality above this value** parameter.
332 331
333 ----- 332 -----
334 333
335 **Example 2**: Report everything 334 **Example 2**: Report everything
336 335
337 In addition to calling variants, it is often useful to know the quality adjusted coverage. Running the tool with these parameters: 336 In addition to calling variants, it is often useful to know the quality adjusted coverage. Running the tool with these parameters:
338 337
339 .. image:: pileup_parser_help2.png 338 .. image:: pileup_parser_help2.png
340 339
341 will report everything from the original file:: 340 will report everything from the original file::
342 341
343 chrM 412 A 2 ., II 2 0 0 0 2 342 chrM 412 A 2 ., II 2 0 0 0 2
344 chrM 413 G 4 ..t, III2 0 0 2 1 3 343 chrM 413 G 4 ..t, III2 0 0 2 1 3
345 chrM 414 C 4 ..Ta III2 0 2 0 1 3 344 chrM 414 C 4 ..Ta III2 0 2 0 1 3
346 chrM 415 C 4 TTTt III7 0 0 0 4 4 345 chrM 415 C 4 TTTt III7 0 0 0 4 4
347 346
348 Here, you can see that although the total coverage at position 414 is 4 (column 4), the quality adjusted coverage is 3 (last column). This is because only three out of four reads have bases with quality above the set threshold of 20 (the actual qualities are III2 or, after conversion, 40, 40, 40, 17). 347 Here, you can see that although the total coverage at position 414 is 4 (column 4), the quality adjusted coverage is 3 (last column). This is because only three out of four reads have bases with quality above the set threshold of 20 (the actual qualities are III2 or, after conversion, 40, 40, 40, 17).
349 348
350 One can use the last column of this dataset to filter out (using Galaxy's **Filter** tool) positions where quality adjusted coverage (last column) is below a set threshold. 349 One can use the last column of this dataset to filter out (using Galaxy's **Filter** tool) positions where quality adjusted coverage (last column) is below a set threshold.
351 350
352 ------ 351 ------
361 360
362 chrM 412 A 2 ., II 2 0 0 0 2 0 361 chrM 412 A 2 ., II 2 0 0 0 2 0
363 chrM 413 G 4 ..t, III2 0 0 2 1 3 1 362 chrM 413 G 4 ..t, III2 0 0 2 1 3 1
364 chrM 414 C 4 ..Ta III2 0 2 0 1 3 1 363 chrM 414 C 4 ..Ta III2 0 2 0 1 3 1
365 chrM 415 C 4 TTTt III7 0 0 0 4 4 0 364 chrM 415 C 4 TTTt III7 0 0 0 4 4 0
366 365
367
368 ----- 366 -----
369 367
370 **Example 4**: Report everything, print total number of differences, and ignore qualities and read bases 368 **Example 4**: Report everything, print total number of differences, and ignore qualities and read bases
371 369
372 Setting **Print quality and base string?** to **Yes** as shown here: 370 Setting **Print quality and base string?** to **Yes** as shown here:
377 375
378 chrM 412 A 2 2 0 0 0 2 0 376 chrM 412 A 2 2 0 0 0 2 0
379 chrM 413 G 4 0 0 2 1 3 1 377 chrM 413 G 4 0 0 2 1 3 1
380 chrM 414 C 4 0 2 0 1 3 1 378 chrM 414 C 4 0 2 0 1 3 1
381 chrM 415 C 4 0 0 0 4 4 0 379 chrM 415 C 4 0 0 0 4 4 0
382
383
384
385
386 </help> 380 </help>
381 <citations>
382 </citations>
387 </tool> 383 </tool>