annotate tools/samtools/pileup_parser.xml @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="pileup_parser" name="Filter pileup" version="1.0.2">>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>on coverage and SNPs</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <command interpreter="perl">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 #if $pileup_type.type_select == "six" #pileup_parser.pl $input "3" "5" "6" "4" $qv_cutoff $cvrg_cutoff $snps_only $interval "2" $out_file1 $diff $qc_base
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 #elif $pileup_type.type_select == "ten" #pileup_parser.pl $input "3" "9" "10" "8" $qv_cutoff $cvrg_cutoff $snps_only $interval "2" $out_file1 $diff $qc_base
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 #elif $pileup_type.type_select == "manual" #pileup_parser.pl $input $pileup_type.ref_base_column $pileup_type.read_bases_column $pileup_type.read_qv_column $pileup_type.cvrg_column $qv_cutoff $cvrg_cutoff $snps_only $interval $pileup_type.coord_column $out_file1 $diff $qc_base
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 #end if#
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 <param name="input" type="data" format="tabular" label="Select dataset"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 <conditional name="pileup_type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 <param name="type_select" type="select" label="which contains" help="See &quot;Types of pileup datasets&quot; below for examples">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <option value="six" selected="true">Pileup with six columns (simple)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 <option value="ten">Pileup with ten columns (with consensus)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 <option value="manual">Set columns manually</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <when value="manual">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <param name="ref_base_column" label="Select column with reference base" type="data_column" numerical="false" data_ref="input" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <param name="read_bases_column" label="Select column with read bases" type="data_column" numerical="false" data_ref="input" help="something like this: ..,a.."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 <param name="read_qv_column" label="Select column with base qualities" type="data_column" numerical="false" data_ref="input" help="something like this: IIIGIAI"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 <param name="cvrg_column" label="Select column with coverage" type="data_column" numerical="true" data_ref="input" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <param name="coord_column" label="Select coordinate column" type="data_column" numerical="true" data_ref="input" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 <when value="six">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 <when value="ten">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 <param name="qv_cutoff" label="Do not consider read bases with quality lower than" type="integer" value="20" help="No variants with quality below this value will be reported"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <param name="cvrg_cutoff" label="Do not report positions with coverage lower than" type="integer" value="3" help="Pileup lines with coverage lower than this value will be skipped"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 <param name="snps_only" label="Only report variants?" type="select" help="See &quot;Examples 1 and 2&quot; below for explanation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <option value="No">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <option value="Yes" selected="true">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 <param name="interval" label="Convert coordinates to intervals?" type="select" help="See &quot;Output format&quot; below for explanation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 <option value="No" selected="true">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 <option value="Yes">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <param name="diff" label="Print total number of differences?" type="select" help="See &quot;Example 3&quot; below for explanation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <option value="No" selected="true">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <option value="Yes">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <param name="qc_base" label="Print quality and base string?" type="select" help="See &quot;Example 4&quot; below for explanation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <option value="No">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <option value="Yes" selected="true">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <data format="tabular" name="out_file1">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 <change_format>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 <when input="interval" value="Yes" format="interval" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 </change_format>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 </data>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 <param name="input" value="pileup_parser.6col.pileup"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <output name="out_file1" file="pileup_parser.6col.20-3-yes-yes.pileup.out"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 <param name="type_select" value="six"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 <param name="qv_cutoff" value="20" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 <param name="cvrg_cutoff" value="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 <param name="snps_only" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 <param name="interval" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 <param name="diff" value="No" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 <param name="qc_base" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 <param name="input" value="pileup_parser.6col.pileup"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 <output name="out_file1" file="pileup_parser.6col.20-3-yes-no.pileup.out"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 <param name="type_select" value="six"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 <param name="qv_cutoff" value="20" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 <param name="cvrg_cutoff" value="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 <param name="snps_only" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 <param name="interval" value="No" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 <param name="diff" value="No" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 <param name="qc_base" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 <param name="input" value="pileup_parser.6col.pileup"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 <output name="out_file1" file="pileup_parser.6col.20-3-no-no.pileup.out"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 <param name="type_select" value="six"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 <param name="qv_cutoff" value="20" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 <param name="cvrg_cutoff" value="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 <param name="snps_only" value="No"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 <param name="interval" value="No" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 <param name="diff" value="No" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 <param name="qc_base" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 <param name="input" value="pileup_parser.10col.pileup"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes.pileup.out"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 <param name="type_select" value="ten"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 <param name="qv_cutoff" value="20" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 <param name="cvrg_cutoff" value="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 <param name="snps_only" value="Yes"/>q
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 <param name="interval" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 <param name="diff" value="No" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 <param name="qc_base" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 <param name="input" value="pileup_parser.10col.pileup"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes.pileup.out"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 <param name="type_select" value="manual"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 <param name="ref_base_column" value="3"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 <param name="read_bases_column" value="9"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 <param name="read_qv_column" value="10"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 <param name="cvrg_column" value="8"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 <param name="coord_column" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 <param name="qv_cutoff" value="20" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 <param name="cvrg_cutoff" value="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 <param name="snps_only" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 <param name="interval" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 <param name="diff" value="No" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 <param name="qc_base" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 <param name="input" value="pileup_parser.10col.pileup"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes-yes-yes.pileup.out"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 <param name="type_select" value="manual"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 <param name="ref_base_column" value="3"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 <param name="read_bases_column" value="9"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 <param name="read_qv_column" value="10"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 <param name="cvrg_column" value="8"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 <param name="coord_column" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 <param name="qv_cutoff" value="20" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 <param name="cvrg_cutoff" value="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 <param name="snps_only" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 <param name="interval" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 <param name="diff" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 <param name="qc_base" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 <param name="input" value="pileup_parser.10col.pileup"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 <output name="out_file1" file="pileup_parser.10col.20-3-yes-yes-yes-no.pileup.out"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 <param name="type_select" value="manual"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 <param name="ref_base_column" value="3"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 <param name="read_bases_column" value="9"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 <param name="read_qv_column" value="10"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 <param name="cvrg_column" value="8"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 <param name="coord_column" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 <param name="qv_cutoff" value="20" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 <param name="cvrg_cutoff" value="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 <param name="snps_only" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 <param name="interval" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 <param name="diff" value="Yes" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 <param name="qc_base" value="No" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 Allows one to find sequence variants and/or sites covered by a specified number of reads with bases above a set quality threshold. The tool works on six and ten column pileup formats produced with *samtools pileup* command. However, it also allows you to specify columns in the input file manually. The tool assumes the following:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 - the quality scores follow phred33 convention, where input qualities are ASCII characters equal to the Phred quality plus 33.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 - the pileup dataset was produced by the *samtools pileup* command (although you can override this by setting column assignments manually).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 --------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 **Types of pileup datasets**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 The descriptions of the following pileup formats are largely based on information that can be found on the SAMTools_ documentation page. The 6- and 10-column variants are described below.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 .. _SAMTools: http://samtools.sourceforge.net/pileup.shtml
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 **Six column pileup**::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 1 2 3 4 5 6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 ---------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 chrM 412 A 2 ., II
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 chrM 413 G 4 ..t, IIIH
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 chrM 414 C 4 ..Ta III2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 chrM 415 C 4 TTTt III7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 where::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 Column Definition
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 ------- ----------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 1 Chromosome
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 2 Position (1-based)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 3 Reference base at that position
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 4 Coverage (# reads aligning over that position)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 5 Bases within reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 6 Quality values (phred33 scale, see Galaxy wiki for more)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 **Ten column pileup**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 The `ten-column`__ pileup incorporates additional consensus information generated with the *-c* option of the *samtools pileup* command::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 1 2 3 4 5 6 7 8 9 10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 ------------------------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 chrM 412 A A 75 0 25 2 ., II
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 chrM 413 G G 72 0 25 4 ..t, IIIH
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 chrM 414 C C 75 0 25 4 ..Ta III2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 chrM 415 C T 75 75 25 4 TTTt III7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 where::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 Column Definition
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 ------- ----------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 1 Chromosome
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 2 Position (1-based)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 3 Reference base at that position
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 4 Consensus bases
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 5 Consensus quality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 6 SNP quality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 7 Maximum mapping quality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 8 Coverage (# reads aligning over that position)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 9 Bases within reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 10 Quality values (phred33 scale, see Galaxy wiki for more)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 .. __: http://samtools.sourceforge.net/cns0.shtml
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221 **The output format**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 The tool modifies the input dataset in two ways:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 1. It appends five columns to the end of every reported line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 - Number of **A** variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228 - Number of **C** variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 - Number of **G** variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 - Number of **T** variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 - Number of read bases covering this position, where quality is equal to or higher than the value set by **Do not consider read bases with quality lower than** option.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 Optionally, if **Print total number of differences?** is set to **Yes**, the tool will append the sixth column with the total number of deviants (see below).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 2. If **Convert coordinates to intervals?** is set to **Yes**, the tool replaces the position column (typically the second column) with a pair of tab-delimited start/end values.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 For example, if you are calling variants with base quality above 20 on this dataset::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 chrM 412 A 2 ., II
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 chrM 413 G 4 ..t, III2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241 chrM 414 C 4 ..Ta III2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 chrM 415 C 4 TTTt III7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 you will get::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 chrM 413 G 4 ..t, IIIH 0 0 2 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 chrM 414 C 4 ..Ta III2 1 1 0 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 chrM 415 C 4 TTTt III7 0 0 0 4 4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 where::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252 Column Definition
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 ------- ----------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 1 Chromosome
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 2 Position (1-based)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 3 Reference base at that position
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 4 Coverage (# reads aligning over that position)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 5 Bases within reads where
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259 6 Quality values (phred33 scale, see Galaxy wiki for more)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 7 Number of A variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 8 Number of C variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 9 Number of G variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263 10 Number of T variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 11 Quality adjusted coverage:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265 12 Number of read bases (i.e., # of reads) with quality above the set threshold
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 13 Total number of deviants (if Convert coordinates to intervals? is set to yes)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 if **Print total number of differences?** is set to **Yes**, you will get::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 chrM 413 G 4 ..t, IIIH 0 0 2 1 3 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271 chrM 414 C 4 ..Ta III2 1 2 0 1 3 2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272 chrM 415 C 4 TTTt III7 0 0 0 4 4 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274 Note the additional column 13, that contains the number of deviant reads (e.g., there are two deviants, T and a, for position 414).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277 Finally, if **Convert coordinates to intervals?** is set to **Yes**, you will get one additional column with the end coordinate::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 chrM 412 413 G 4 ..t, III2 0 0 2 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280 chrM 414 415 C 4 ..Ta III2 1 2 0 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281 chrM 414 415 C 4 TTTt III7 0 0 0 4 4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 where::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 Column Definition
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 ------- ----------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 1 Chromosome
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 2 Start position (0-based)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289 3 End position (1-based)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 4 Reference base at that position
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291 5 Coverage (# reads aligning over that position)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 6 Bases within reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293 7 Quality values (phred33 scale, see Galaxy wiki for more)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 8 Number of A variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 9 Number of C variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 10 Number of G variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297 11 Number of T variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 12 Quality adjusted coverage
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 13 Total number of deviants (if Convert coordinates to intervals? is set to yes)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302 Note that in this case the coordinates of SNPs were converted to intervals, where the start coordinate is 0-based and the end coordinate in 1-based using the UCSC Table Browser convention.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 Although three positions have variants in the original file (413, 414, and 415), only 413 and 415 are reported because the quality values associated with these two SNPs are above the threshold of 20. In the case of 414 the **a** allele has a quality value of 17 ( ord("2")-33 ), and is therefore not reported. Note that five columns have been added to each of the reported lines::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
305
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
306 chrM 413 G 4 ..t, IIIH 0 0 2 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
307
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
308 Here, there is one variant, **t**. Because the fourth column represents **T** counts, it is incremented by 1. The last column shows that at this position, three reads have bases above the quality threshold of 20.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
309
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
310 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
311
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
312 **Example 1**: Just variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
313
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
314 In this mode, the tool only outputs the lines from the input datasets where at least one read contains a sequence variant with quality above the threshold set by the **Do not consider read bases with quality lower than** option. For example, suppose one has a pileup dataset like the following::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
315
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
316 chrM 412 A 2 ., II
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
317 chrM 413 G 4 ..t, III2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
318 chrM 414 C 4 ..Ta III2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
319 chrM 415 C 4 TTTt III7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
320
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
321 To call all variants (with no restriction by coverage) with quality above phred value of 20, we will need to set the parameters as follows:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
322
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
323 .. image:: ./static/images/pileup_parser_help1.png
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
324
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
325 Running the tool with these parameters will return::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
326
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
327 chrM 413 G 4 ..t, IIIH 0 0 0 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
328 chrM 414 C 4 ..Ta III2 0 2 0 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
329 chrM 415 C 4 TTTt III7 0 0 0 4 4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
330
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
331 **Note** that position 414 is not reported because the *a* variant has associated quality value of 17 (because ord('2')-33 = 17) and is below the phred threshold of 20 set by the **Count variants with quality above this value** parameter.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
332
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
333 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
334
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
335 **Example 2**: Report everything
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
336
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
337 In addition to calling variants, it is often useful to know the quality adjusted coverage. Running the tool with these parameters:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
338
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
339 .. image:: ./static/images/pileup_parser_help2.png
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
340
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
341 will report everything from the original file::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
342
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
343 chrM 412 A 2 ., II 2 0 0 0 2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
344 chrM 413 G 4 ..t, III2 0 0 2 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
345 chrM 414 C 4 ..Ta III2 0 2 0 1 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
346 chrM 415 C 4 TTTt III7 0 0 0 4 4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
347
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
348 Here, you can see that although the total coverage at position 414 is 4 (column 4), the quality adjusted coverage is 3 (last column). This is because only three out of four reads have bases with quality above the set threshold of 20 (the actual qualities are III2 or, after conversion, 40, 40, 40, 17).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
349
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
350 One can use the last column of this dataset to filter out (using Galaxy's **Filter** tool) positions where quality adjusted coverage (last column) is below a set threshold.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
351
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
352 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
353
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
354 **Example 3**: Report everything and print total number of differences
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
355
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
356 If you set the **Print total number of differences?** to **Yes** the tool will print an additional column with the total number of reads where a devinat base is above the quality threshold. So, seetiing parametrs like this:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
357
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
358 .. image:: ./static/images/pileup_parser_help3.png
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
359
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
360 will produce this::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
361
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
362 chrM 412 A 2 ., II 2 0 0 0 2 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
363 chrM 413 G 4 ..t, III2 0 0 2 1 3 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
364 chrM 414 C 4 ..Ta III2 0 2 0 1 3 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
365 chrM 415 C 4 TTTt III7 0 0 0 4 4 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
366
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
367
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
368 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
369
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
370 **Example 4**: Report everything, print total number of differences, and ignore qualities and read bases
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
371
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
372 Setting **Print quality and base string?** to **Yes** as shown here:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
373
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
374 .. image:: ./static/images/pileup_parser_help4.png
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
375
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
376 will produce this::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
377
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
378 chrM 412 A 2 2 0 0 0 2 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
379 chrM 413 G 4 0 0 2 1 3 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
380 chrM 414 C 4 0 2 0 1 3 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
381 chrM 415 C 4 0 0 0 4 4 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
382
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
383
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
384
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
385
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
386 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
387 </tool>