comparison my_VDM_tool.xml @ 3:44e4f5bfebde draft default tip

Uploaded
author fxce
date Mon, 16 Nov 2020 13:59:53 +0000
parents
children
comparison
equal deleted inserted replaced
2:472ea11c2b25 3:44e4f5bfebde
1 <tool id="my_VDM_tool" name="VDM_tool" version="1.0.0">
2 <description>Map a mutation using the Variant Discovery Mapping approach (in silico bulk segregant linkage analysis).</description>
3 <stdio>
4 <exit_code range="1:"/>
5 </stdio>
6 <requirements>
7 <requirement type="package" version="3.2.1">R</requirement>
8 <requirement type="package" version="1.2.0">getopt</requirement>
9 </requirements>
10
11 <command>
12 Rscript "${__tool_directory__}/my_VDM_tool.R"
13 --inf "$inf"
14 #if $species.species_select=="Celegans"
15 --itype "$species.ce"
16 #else if $species.species_select=="Zebrafish"
17 --itype "$species.ze"
18 #else if $species.species_select=="Brachypodium"
19 --itype "$species.br"
20 #else if $species.species_select=="Arabidopsis"
21 --itype "$species.ar"
22 #else if $species.species_select=="other"
23 --itype "$species.ot"
24 #end if
25
26 --qual $qual
27
28 #if $allfreq.allfreq_select=="AB"
29 --allr "$allfreq.ab"
30 #else if $allfreq.allfreq_select=="ratio"
31 --allr "$allfreq.ratio"
32 #end if
33
34 #if $only_snp.only_snp_select=="TRUE"
35 --snp "$only_snp.true"
36 #else if $only_snp.only_snp_select=="FALSE"
37 --snp "$only_snp.false"
38 #end if
39
40 --freqthr "$freqthr"
41
42 --lsp $lsp
43 --pcol "$pcol"
44 --lcol "$lcol"
45
46 #if $xaxis.xaxis_select=="TRUE"
47 --xstand $xaxis.true
48 #else if $xaxis.xaxis_select=="FALSE"
49 --xstand $xaxis.false
50 #end if
51
52 --bsize $bsize
53
54 #if $binnorm.binnorm_select=="TRUE"
55 --bnorm $binnorm.true
56 #else if $binnorm.binnorm_select=="FALSE"
57 --bnorm $binnorm.false
58 #end if
59
60 #if $exclfiles.exclfiles_select=="FALSE"
61 --exclf $exclfiles.false
62 #else if $exclfiles.exclfiles_select=="TRUE"
63 --exclf $exclfiles.true
64 #end if
65
66 --exclcol "$exclcol"
67
68 --outn "$outn"
69 --pdfn "$pdfn"
70 </command>
71
72 <inputs>
73 <param type="data" name="inf" format="vcf" label="fastq file"/>
74
75 <conditional name="species">
76 <param name="species_select" type="select" label="Select the species">
77 <option value="Celegans">C. elegans</option>
78 <option value="Zebrafish">Zebrafish</option>
79 <option value="Brachypodium">Brachypodium</option>
80 <option value="Arabidopsis">Arabidopsis</option>
81 <option value="other">other</option>
82 </param>
83 <when value="Celegans">
84 <param name="ce" type="hidden" value="C.elegans" label="The C. elegans chromosome numbers and lengths (in Mb)" help=""/>
85 </when>
86 <when value="Zebrafish">
87 <param name="ze" type="hidden" value="Zebrafish" label="The Zebrafish chromosome numbers and lengths (in Mb)" help=""/>
88 </when>
89 <when value="Brachypodium">
90 <param name="br" type="hidden" value="Brachypodium" label="The Brachypodium chromosome numbers and lengths (in Mb)" help=""/>
91 </when>
92 <when value="Arabidopsis">
93 <param name="ar" type="hidden" value="Arabidopsis" label="The Arabidopsis chromosome numbers and lengths (in Mb)" help=""/>
94 </when>
95 <when value="other">
96 <param name="ot" type="data" format="tabular" label="Select file with chromosome numbers and lengths (in Mb) from your history" help="Table consisting of chromosome number in column 1 and length (in Mb) in column 2 (e.g. 'CHRI 16' or 'CHR1 16') with no column header names, tab-delimitation, and no quotation marks in a .txt file"/>
97 </when>
98 </conditional>
99
100 <param type="float" name="qual" value="200" label="Filter by quality" help=""/>
101
102 <conditional name="allfreq">
103 <param name="allfreq_select" type="select" label="Select how allele ratios is calculated">
104 <option value="AB">AB</option>
105 <option value="ratio">AO/(AO+RO)</option>
106 </param>
107 <when value="AB">
108 <param name="ab" type="hidden" value="AB" label="Use AB field from Freebayes" help="Use AB field (from Freebayes) as the value for allele frequency"/>
109 </when>
110 <when value="ratio">
111 <param name="ratio" type="hidden" value="ratio" label="Use AO/(AO+RO) calculation from Freebayes" help="Use AO/(AO+RO) calculation (from Freebayes) as the value for allele frequency"/>
112 </when>
113 </conditional>
114
115 <conditional name="only_snp">
116 <param name="only_snp_select" type="select" label="Select type of variants to use for plotting">
117 <option value="TRUE">only SNP variants</option>
118 <option value="FALSE">all variant types</option>
119 </param>
120 <when value="TRUE">
121 <param name="true" type="hidden" value="TRUE" label="Use only SNP variants" help="Use only SNP variants"/>
122 </when>
123 <when value="FALSE">
124 <param name="false" type="hidden" value="FALSE" label="Use all types of variants" help="Use all types of variants"/>
125 </when>
126 </conditional>
127
128 <param type="float" name="lsp" value="0.4" label="Loess span" help="Parameter that controls the smoothing of the Loess curve"/>
129 <param type="text" name="pcol" value="black" label="Colour of scatterplot points" help="See below for list of supported colors"/>
130 <param type="text" name="lcol" value="red" label="Colour of Loess curve" help="See below for list of supported colors"/>
131
132
133 <conditional name="xaxis">
134 <param name="xaxis_select" type="select" label="Spacing of x-axis intervals">
135 <option value="TRUE">Uniform lengths for Mb</option>
136 <option value="FALSE">Chromosome-scaled lengths for Mb</option>
137 </param>
138 <when value="TRUE">
139 <param name="true" type="hidden" value="TRUE" label="Uniform spacing of the x-axis based on Mb" help="Scale of x-axis (in Mb) is fixed for the scatter plots and frequency plots across all chromosomes"/>
140 </when>
141 <when value="FALSE">
142 <param name="false" type="hidden" value="FALSE" label="Variable spacing of the x-axis based on chromosome lengths" help="Scale of x-axis (in Mb) is dependent on chromosome length for the scatter plots and frequency plots for all chromosomes"/>
143 </when>
144 </conditional>
145
146 <param type="integer" name="bsize" value="1000000" label="Bin-size for frequency plots of homozygous variants" help=""/>
147 <param type="text" name="freqthr" value="0.0-1.0" label="Limits of allele ratios for variant to be considered homozygous" help="For frequency plots"/>
148
149 <conditional name="binnorm">
150 <param name="binnorm_select" type="select" label="Normalization of y-axis in frequency plots">
151 <option value="FALSE">Actual Frequency</option>
152 <option value="TRUE">Normalized Frequency</option>
153 </param>
154 <when value="TRUE">
155 <param name="true" type="hidden" value="TRUE" label="Normalized y-axis frequency values based on formula" help="Normalisation formula as in cloudmap paper"/>
156 </when>
157 <when value="FALSE">
158 <param name="false" type="hidden" value="FALSE" label="Original frequency y-axis values" help=" "/>
159 </when>
160 </conditional>
161
162 <conditional name="exclfiles">
163 <param name="exclfiles_select" type="select" label="Additional exclusion of variants by subtraction">
164 <option value="FALSE">No</option>
165 <option value="TRUE">Yes</option>
166 </param>
167 <when value="FALSE">
168 <param name="false" type="hidden" value="FALSE" label="No additional variant subtraction" help=""/>
169 </when>
170 <when value="TRUE">
171 <param name="true" type="data" format="tabular" label="Select variant lists to subtract from your history" help="Requires .txt file with tab-delimited columns for CHR POS REF ALT. Recommend directly using the tables generated by this tool from other samples or as a reference for the required format"/>
172 </when>
173 </conditional>
174
175 <param type="text" name="exclcol" value="green" label="Colour of original Loess curve (before additional variant subtraction)" help="See below for list of supported colors"/>
176 </inputs>
177
178 <outputs>
179 <data name="outn" format="txt"/>
180 <data name="pdfn" format="pdf"/>
181 </outputs>
182
183
184 <tests>
185 <test>
186 <param name="inf" value="nor22.vcf"/>
187 <output name="outn" file="output.txt"/>
188 </test>
189 </tests>
190
191 <help><![CDATA[
192
193 **What it does**
194
195 This tool generates plots based on variant allele ratios along the genome to facilitate mutation mapping using the Variant Discovery Mapping approach (a form of bulk segregant linkage analysis). It has a few basic filtering options built-in for improved functionality as it serves as the last step in the VDM pipeline [ref].
196
197 Note that this tool is only compatible with VCF file generated by Freebayes variant calling and annotated with SnpEff.
198
199 It is a replacement for the deprecated cloudmap pipeline and plotting tool. For more details and see paper [link pending].
200
201 A standalone R version of this code with additional data/metric outputs can be found at https://github.com/fxce/vdm_plot.
202
203
204
205 ------
206
207 **OUTPUT**
208
209 ------
210
211 This tool generates a text file and a pdf file.
212
213 The text file lists the variants that have passed quality filtering along with information on position, allele ratio, details on the mutation, and gene annotation (as parsed from the Freebayes VCF INFO and SnpEff annotation). This offers an easy reference to check for variants after identifying a region of interest from the plots and the parsed format facilitates sorting in e.g. Excel for ones that affect protein-coding.
214
215 The pdf consists of (1) scatterplots based on allele ratios and their corresponding Loess curves along each chromosome; (2) barplots for the frequency of homozygous (ratio=1) variants along each chromosome.
216
217
218
219 ------
220
221 **INPUT**
222
223 ------
224
225 **Select data**
226
227 Load VCF file from history (generated by Freebayes and then annotated with SnpEff).
228
229
230
231 **Select species**
232
233 The configuration of chromosomes and sizes needs to be known and this tool is preloaded with this information for for C. elegans, Zebrafish, Brachypodium, and Arabidopsis. To use a custom chromosome configuration, a table containing chromosome numbers in column 1 and their corresponding sizes in Mb in column 2 should be prepared and saved as a tab-delimited .txt file with no column header names, and no quotation marks. Once uploaded to your Galaxy workspace/history, selecting option 'other' will allow for your custom file to be loaded from your history and used instead. e.g.
234
235 CHR1 16
236
237 CHR2 17
238
239
240
241 **Filter Quality**
242
243 Filter by quality, only variants with greater or equal quality values will be retained. Normally, a filter of quality 200 produces a good balance between reliable data and a reasonable amount of variants for plotting although this is dependent on your dataset. It may be useful to test with other quality values (e.g. 100 and 300) if too few or too many variants pass the filter.
244
245
246
247 **How allele ratio is calculated**
248
249 Option 'AB' uses the AB value from parsed from the VCF INFO that is generated by Freebayes. Option 'AO/(AO+RO)' calculates this ratio value from AO and RO values parsed from the VCF INFO that is generated by Freebayes.
250
251 The AB should equal AO/(AO+RO) in almost all cases, this option is included because there are reports that AB deviates for variants with low quality.
252
253
254
255 **Type of variants**
256
257 A choice between using only SNP variants or using all variants.
258
259 (The main reason to consider looking at only SNP variants is that Freebayes identifies a number of variants to have arisen from complex substitution/insertion/deletion/other events and it is uncertain how reproducibly some of these long stretches of sequence would be reported across different sequencing reactions. If inconsistent, some of these might be missed by subtraction of the variants in the reference/background leading to false positives.)
260
261
262
263 **Loess span**
264
265 This parameter controls the degree of smoothing for the Loess curve and can have a value between 0 and 1, higher values will result in smoother curves.
266
267
268
269 **Colour of the scatterplot points**
270
271 Any colour names compatible with default R can be used, e.g. can refer to http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf.
272
273
274
275 **Colour of Loess curve**
276
277 Any colour names compatible with default R can be used, e.g. can refer to http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf.
278
279
280
281 **Spacing of x-axis**
282
283 'Uniform spacing ...' sets the x-axis to be scaled the identically, i.e. the interval for 1Mb, across all chromosomes and for both scatterplots and barplots. 'Variable spacing...' sets the x-axis to scale depending on the length of the chromosome being plotted.
284
285
286
287 **Bin-size**
288
289 Specifies how the frequency for homozygous variants is binned (in bp) and plotted in the barplot with the default 1000000 setting each bar to span 1Mb.
290
291
292
293 **Limits for allele ratio to be considered homozygous**
294
295 Variants with allele ratios below the lower value will be considered homozygous REF and above the upper value will be considered homozygous ALT, this option only affects the barplots for the frequency of homozygous variants along chromosomes. Default "0-1" means allele ratios of 0 is considered homozygous for the reference allele and 1 is considered homozygous for the alternate allele. Using <1.0 for the upper limit can be used to visualise the profile of near-homozygous variants and may be more informative if the frequency of homozygous variants (allele ratio=1.0) is low e.g. due to suspected contamination of pooled sample with non-homozygous lines.
296
297
298
299 **Normalized y-axis frequency values**
300
301 Select between the actual frequency values for homozygous variants in the barplots and normalized frequency. Normalized frequency applies the formula from the Cloudmap paper (Minevich et al., 2012 Genetics 192(4):1249-1269) to improve mapping signal, though it is more relevant for Hawaiian mapping.
302
303
304
305 **Additional exclusion of variants**
306
307 This provides the option for a user submitted list of variants to be subtracted from the sample list before plotting and is compatible with the variant table generated by this tool. The subtraction is based on matching variants with identical chromosomal location, reference nucleotide, and alternate nucleotide which are then removed. Therefore, formatting of the subtraction list requires columns for chromosome, position, reference nucleotide, and alternate nucleotide which are named 'CHR POS REF ALT', e.g. (+RATIO)
308
309 CHR POS REF ALT
310
311 III 12 G A
312
313
314
315 Note that selecting multiple subtraction files here is not cumulative, only one variant list is subtracted from the sample each time before the an output is generated. Merging variant lists from different samples would have to be performed manually before being uploaded and selected as a subtraction list.
316
317
318
319 **Colour of original Loess curve**
320
321 Any colour names compatible with default R can be used, e.g. can refer to http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf.
322
323
324
325
326 ]]>
327 </help>
328 </tool>