annotate VDM_Mapping.xml @ 10:c80c0aabc869 draft default tip

Uploaded
author gregory-minevich
date Fri, 09 May 2014 16:55:33 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
1 <tool id="cloudmap_variant_discovery_mapping" name="CloudMap: Variant Discovery Mapping with WGS data">
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
2 <description>Map a mutation using in silico bulk segregant linkage analysis using variants that are already present in the mutant strain of interest (rather than those introduced by a cross to a polymorphic strain).</description>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
3 <command interpreter="python">
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
4 #if $source.source_select=="elegans" #VDM_Mapping.py --sample_vcf "$sample_vcf" --loess_span "$loess_span" --d_yaxis "$d_yaxis" --h_yaxis "$h_yaxis" --points_color "$points_color" --loess_color "$loess_color" --output "$output" --location_plot_output "$location_plot_output" --standardize "$standardize" --normalize_bins "$normalize_bins" --break_file "$source.Celegans"
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
5 #else if $source.source_select=="brachypodium" #SNP_Mapping.py --sample_vcf "$sample_vcf" --loess_span "$loess_span" --d_yaxis "$d_yaxis" --h_yaxis "$h_yaxis" --points_color "$points_color" --loess_color "$loess_color" --output "$output" --location_plot_output "$location_plot_output" --standardize "$standardize" --normalize_bins "$normalize_bins" --break_file "$source.Brachy"
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
6 #else if $source.source_select=="arabidopsis" #VDM_Mapping.py --sample_vcf "$sample_vcf" --loess_span "$loess_span" --d_yaxis "$d_yaxis" --h_yaxis "$h_yaxis" --points_color "$points_color" --loess_color "$loess_color" --output "$output" --location_plot_output "$location_plot_output" --standardize "$standardize" --normalize_bins "$normalize_bins" --break_file "$source.Arabidop"
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
7 #else if $source.source_select=="other" #VDM_Mapping.py --sample_vcf "$sample_vcf" --loess_span "$loess_span" --d_yaxis "$d_yaxis" --h_yaxis "$h_yaxis" --points_color "$points_color" --loess_color "$loess_color" --output "$output" --location_plot_output "$location_plot_output" --standardize "$standardize" --normalize_bins "$normalize_bins" --break_file "$source.Other"
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
8 #end if
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
9 </command>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
10
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
11 <inputs>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
12 <conditional name="source">
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
13 <param name="source_select" type="select" label="Please select the species">
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
14 <option value="elegans">C. elegans</option>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
15 <option value="brachypodium">Brachypodium</option>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
16 <option value="arabidopsis">Arabidopsis</option>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
17 <option value="other">Other</option>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
18 </param>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
19 <when value="elegans">
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
20 <param name="Celegans" type="hidden" value="C.elegans" label="The C.elegans configuration file by default" help="C.elegans help" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
21 </when>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
22 <when value="brachypodium">
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
23 <param name="Brachy" type="hidden" value="Brachypodium" label="The Brachypodium configuration file by default" help="Brachypodium help" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
24 </when>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
25 <when value="arabidopsis">
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
26 <param name="Arabidop" type="hidden" value="Arabidopsis" label="The Arabidopsis configuration file by default" help="Arabidopsis help" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
27 </when>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
28 <when value="other">
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
29 <param name="Other" type="data" format="tabular" label="Please select your 'Other species' configuration file from your history" help="Tabular configuration file for Other species support" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
30 </when>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
31 </conditional>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
32 <param name="sample_vcf" size = "125" type="data" format="vcf" label="WGS Mutant VCF File" help="WGS Mutant VCF file from pooled F2 mutants that have been outcrossed to any strain. The VCF should contain data from only heterozygous or homozygous base position as determined by the GATK Unified Genotyper filtered for quality score > Q200" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
33 <param name="loess_span" size = "15" type="float" value=".4" label="Loess span" help="Parameter that controls the degree of data smoothing."/>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
34 <param name="d_yaxis" size = "15" type="float" value="1" label="Y-axis upper limit for scatter plot" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
35 <param name="h_yaxis" size = "15" type="integer" value="0" label="Y-axis upper limit for frequency plot" help="'0' default adjusts scale to tallest peak"/>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
36 <param name="points_color" size = "15" type="text" value="gray27" label="Color for data points" help="See below for list of supported colors"/>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
37 <param name="loess_color" size = "15" type="text" value="green" label="Color for loess regression line" help="See below for list of supported colors"/>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
38 <param name="standardize" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Standardize X-axis" help="Scatter plots and frequency plots from separate chromosomes will have uniform X-axis spacing for comparison"/>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
39 <param name="normalize_bins" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Normalize frequency plots" help="Frequency plots of pure parental allele counts will be normalized according to the equation in Supp Fig.4 of the CloudMap paper"/>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
40 </inputs>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
41 <outputs>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
42 <data name="output" type="text" format="tabular" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
43 <data name="location_plot_output" format="pdf" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
44 </outputs>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
45 <requirements>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
46 <requirement type="python-module">sys</requirement>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
47 <requirement type="python-module">optparse</requirement>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
48 <requirement type="python-module">csv</requirement>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
49 <requirement type="python-module">re</requirement>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
50 <requirement type="python-module">decimal</requirement>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
51 <requirement type="python-module">rpy</requirement>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
52 </requirements>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
53 <tests>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
54 <param name="sample_vcf" value="" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
55 <output name="output" file="" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
56 <output name="plot_output" file="" />
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
57 </tests>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
58 <help>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
59 **What it does:**
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
60
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
61 This tool is part of the CloudMap pipeline for analysis of mutant genome sequences. For further details, please see `Gregory Minevich, Danny S. Park, Daniel Blankenberg, Richard J. Poole and Oliver Hobert. CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (Genetics 2012 In Press)`__
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
62
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
63 .. __: http://hobertlab.org/original-research/
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
64
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
65 CloudMap workflows, shared histories and reference datasets are available at the `CloudMap Galaxy page`__
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
66
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
67 .. __: http://usegalaxy.org/cloudmap
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
68
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
69 CloudMap video user guides and Frequently Asked Questions (FAQs) are available at the `Hobert lab website`__
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
70
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
71 .. __: http://hobertlab.org/cloudmap
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
72
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
73
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
74 Although Hawaiian Variant Mapping is the preferred method for mapping causal mutations in whole genome sequenced strains (see CloudMap Hawaiian Variant Mapping with WGS tool), there remain certain scenarios where alternate mapping approaches are useful. For instance, introducing tens of thousands of Hawaiian variants into a mutant strain may not be desirable for individuals concerned with the possibility that some of these Hawaiian variants may act as modifiers of a given phenotype. Behavioral mutants may be especially vulnerable in this regard. Furthermore, in the case of suppressor screens or other screens that have been performed in a mutant background, it is tedious to recover both the suppressor variant and the starting mutation when picking the F2 progeny required for the Hawaiian Variant Mapping technique. In these scenarios, it is useful to not have to rely on a polymorphic mapping strain like the Hawaiian strain.
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
75
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
76 A recent study in plants (Abe et al. 2012), uses EMS-induced variants and bulk segregant analysis to map a phenotype-causing mutation. We have developed a similar method, which we call “Variant Discovery Mapping”. Our method makes use of background variants in addition to EMS-induced variants (including indels as well as SNPs), and also uses the bulk segregant approach.
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
77
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
78 **The conceptual strategy of variant discovery mapping is to perform in silico bulk segregant linkage analysis using variants that are already present in the mutant strain of interest, rather than examining those introduced by a cross to a polymorphic strain.** Any individual mutant strain will contain a certain number of homozygous variants compared to the reference genome. These homozygous variants are of two types: 1) those directly induced during mutagenesis (one or more of which are responsible for the mutant phenotype) (Fig.11A red diamonds) and 2) those already present in the background of the parental strain, either because of genetic drift or because of the parental strain containing, for example, a transgene that was integrated into the genome by irradiation (Fig.11A pale blue diamonds).
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
79
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
80
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
81 .. image:: http://www.hobertlab.org/CloudMap/Fig.11A_VDM.png
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
82
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
83
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
84 Following an outcross to a non-parental strain and selection of a pool of F2-mutant recombinants, these homozygous variants will segregate according to their degree of linkage to the phenotype-inducing locus. The degree of linkage will be directly reflected in the allele frequency among the pool of recombinants and this can be represented as scatter plots of the ratio of variant reads/total reads present in the pool of sequenced recombinants (Fig.11A). We then plot a loess regression line through all the points on a given chromosome to give greater accuracy to the mapping region (Fig.11B). The loess lines on scatter plots for linked chromosomes approach 1, indicating retention of the original homozygous variants in the linked region. We also draw corresponding frequency plots that display regions of linked chromosomes where pure parental allele variant positions are concentrated (positions where the ratio of variant reads/total reads are equal to 1) (Fig.11B). 1Mb bins for the 0 ratio SNP positions are colored gray by default and .5Mb bins are colored in red.
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
85
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
86
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
87 .. image:: http://www.hobertlab.org/CloudMap/Fig.11B-C_VDM_2.png
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
88
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
89
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
90
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
91
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
92 **We have tested the Variant Discovery Mapping method by crossing mutant strains to either N2 or Hawaiian CB4856 strains. In theory, it should be possible to cross the mutant strain to the original starting screening strain (as opposed to N2). For example, one might want to perform such a cross in the case of a suppressor screen where the screening strain has a second mutation that must be present for the mutant phenotype to be visible. However, crossing to the original starting screening strain will result in fewer mutations (relative to N2) that will be retained in the pooled sample that is sequenced. For this reason, mapping resulting from a cross to the original starting screening strain will not be as accurate as mapping from a cross to N2 and certainly not as accurate as mapping from a cross to the Hawaiian strain.**
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
93
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
94 ------
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
95
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
96 **Input:**
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
97
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
98 This tool accepts as input a single VCF file containing reference (e.g. Bristol) and alternate (e.g. EMS, background, or crossing strain variant) alleles calls in the pooled mutant sample. This input VCF is generated at an earlier analysis step by running the GATK Unified Genotyper on a BAM alignment file of the pooled mutant sample and selecting only heterozygous or homozygous base positions as determined by the GATK Unified Genotyper (filtered for quality score > Q200). The reader is referred to the user guide and online video for direction on this procedure.
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
99
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
100 The CloudMap Variant Discovery Mapping with WGS Data tool supports data from any organism. C. elegans and Arabidopsis are natively supported. For all other organisms, users must provide a simple tab-delimited configuration file containing chromosome numbers and respective lengths (example configuration files for most major organisms provided at http://usegalaxy.org/cloudmap).
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
101
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
102
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
103 **Output:**
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
104
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
105 The tool also provides a tabular output file that contains a count of the number of reference and alternate variants in the pooled mutant sample as well as the ratio of alternate alleles/total reads at each variant position.
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
106
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
107
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
108 ------
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
109
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
110 **Settings:**
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
111
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
112 .. class:: infomark
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
113
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
114 Information on loess regression and the loess span parameter:
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
115 http://en.wikipedia.org/wiki/Local_regression
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
116
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
117 .. class:: infomark
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
118
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
119 Based on our testing, we've settled on .4 as a loess span default. Larger values result in smoothing of the line to reflect trends at a more macro level. Smaller values result in loess lines that more closely reflect local data fluctuations.
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
120
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
121 .. class:: infomark
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
122
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
123 Supported colors for data points and loess regression line:
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
124
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
125 http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
126
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
127 http://research.stowers-institute.org/efg/R/Color/Chart/ColorChart.pdf
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
128
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
129
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
130
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
131 .. class:: warningmark
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
132
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
133 This tool requires that the statistical programming environment R has been installed on the system hosting Galaxy (http://www.r-project.org/). If you are running this tool on Galaxy via the Cloud, this does not apply to you.
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
134
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
135
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
136 ------
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
137
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
138 **Citation:**
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
139
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
140 This tool is part of the CloudMap package from the Hobert Lab. If you use this tool, please cite `Gregory Minevich, Danny S Park, Daniel Blankenberg, Richard J. Poole, and Oliver Hobert. CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (Genetics 2012 In Press)`__
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
141
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
142 .. __: http://hobertlab.org/cloudmap
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
143
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
144 Correspondence to gm2123@columbia.edu (Gregory Minevich) or r.poole@ucl.ac.uk (Richard J. Poole) or or38@columbia.edu (Oliver Hobert)
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
145
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
146 </help>
c80c0aabc869 Uploaded
gregory-minevich
parents:
diff changeset
147 </tool>