annotate SNP_Mapping.xml @ 36:ca390f9c6b29 draft

Uploaded
author gregory-minevich
date Sat, 31 May 2014 10:21:44 -0400
parents 11da66cb7216
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
32
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
1 <tool id="snp_mapping_using_wgs" name="CloudMap: Hawaiian Variant Mapping with WGS data">
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
2 <description>Map a mutation by plotting recombination frequencies resulting from crossing to a highly polymorphic strain</description>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
3 <command interpreter="python">
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
4 #if $source.source_select=="elegans" #SNP_Mapping.py --sample_vcf "$sample_vcf" --loess_span "$loess_span" --d_yaxis "$d_yaxis" --h_yaxis "$h_yaxis" --points_color "$points_color" --loess_color "$loess_color" --output "$output" --location_plot_output "$location_plot_output" --standardize "$standardize" --normalize_bins "$normalize_bins" --break_file "$source.Celegans"
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
5 #else if $source.source_select=="brachypodium" #SNP_Mapping.py --sample_vcf "$sample_vcf" --loess_span "$loess_span" --d_yaxis "$d_yaxis" --h_yaxis "$h_yaxis" --points_color "$points_color" --loess_color "$loess_color" --output "$output" --location_plot_output "$location_plot_output" --standardize "$standardize" --normalize_bins "$normalize_bins" --break_file "$source.Brachy"
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
6 #else if $source.source_select=="arabidopsis" #SNP_Mapping.py --sample_vcf "$sample_vcf" --loess_span "$loess_span" --d_yaxis "$d_yaxis" --h_yaxis "$h_yaxis" --points_color "$points_color" --loess_color "$loess_color" --output "$output" --location_plot_output "$location_plot_output" --standardize "$standardize" --normalize_bins "$normalize_bins" --break_file "$source.Arabidop"
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
7 #else if $source.source_select=="other" #SNP_Mapping.py --sample_vcf "$sample_vcf" --loess_span "$loess_span" --d_yaxis "$d_yaxis" --h_yaxis "$h_yaxis" --points_color "$points_color" --loess_color "$loess_color" --output "$output" --location_plot_output "$location_plot_output" --standardize "$standardize" --normalize_bins "$normalize_bins" --break_file "$source.Other"
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
8 #end if
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
9 </command>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
10
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
11 <inputs>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
12 <conditional name="source">
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
13 <param name="source_select" type="select" label="Please select the species">
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
14 <option value="elegans">C. elegans</option>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
15 <option value="brachypodium">Brachypodium</option>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
16 <option value="arabidopsis">Arabidopsis</option>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
17 <option value="other">Other</option>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
18 </param>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
19 <when value="elegans">
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
20 <param name="Celegans" type="hidden" value="C.elegans" label="The C.elegans configuration file by default" help="C.elegans help" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
21 </when>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
22 <when value="brachypodium">
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
23 <param name="Brachy" type="hidden" value="Brachypodium" label="The Brachypodium configuration file by default" help="Brachypodium help" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
24 </when>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
25 <when value="arabidopsis">
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
26 <param name="Arabidop" type="hidden" value="Arabidopsis" label="The Arabidopsis configuration file by default" help="Arabidopsis help" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
27 </when>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
28 <when value="other">
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
29 <param name="Other" type="data" format="tabular" label="Please select your 'Other species' configuration file from your history" help="Tabular configuration file for Other species support" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
30 </when>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
31 </conditional>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
32 <param name="sample_vcf" size = "125" type="data" format="vcf" label="WGS Mutant VCF File" help="WGS Mutant VCF file from pooled F2 mutants that have been crossed to a mapping strain. The VCF should contain data from only mapping strain (e.g. Hawaiian) SNP positions" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
33 <param name="loess_span" size = "15" type="float" value=".1" label="Loess span" help="Parameter that controls the degree of data smoothing."/>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
34 <param name="d_yaxis" size = "15" type="float" value="1" label="Y-axis upper limit for scatter plot" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
35 <param name="h_yaxis" size = "15" type="integer" value="0" label="Y-axis upper limit for frequency plot" help="'0' default adjusts scale to tallest peak" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
36 <param name="points_color" size = "15" type="text" value="gray27" label="Color for data points" help="See below for list of supported colors"/>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
37 <param name="loess_color" size = "15" type="text" value="red" label="Color for loess regression line" help="See below for list of supported colors"/>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
38 <param name="standardize" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Standardize X-axis" help="Scatter plots and frequency plots from separate chromosomes will have uniform X-axis spacing for comparison"/>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
39 <param name="normalize_bins" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Normalize frequency plots" help="Frequency plots of pure parental allele counts will be normalized according to the equation in Fig.7B of the CloudMap paper"/>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
40 </inputs>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
41 <outputs>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
42 <data name="output" type="text" format="tabular" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
43 <data name="location_plot_output" format="pdf" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
44 </outputs>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
45 <requirements>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
46 <requirement type="python-module">sys</requirement>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
47 <requirement type="python-module">optparse</requirement>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
48 <requirement type="python-module">csv</requirement>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
49 <requirement type="python-module">re</requirement>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
50 <requirement type="python-module">decimal</requirement>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
51 <requirement type="python-module">rpy</requirement>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
52 </requirements>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
53 <tests>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
54 <param name="sample_vcf" value="" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
55 <output name="output" file="" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
56 <output name="plot_output" file="" />
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
57 </tests>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
58 <help>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
59 **What it does:**
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
60
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
61 This tool is part of the CloudMap pipeline for analysis of mutant genome sequences. For further details, please see `Gregory Minevich, Danny S. Park, Daniel Blankenberg, Richard J. Poole and Oliver Hobert. CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (Genetics 2012 In Press)`__
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
62
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
63 .. __: http://hobertlab.org/original-research/
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
64
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
65 CloudMap workflows, shared histories and reference datasets are available at the `CloudMap Galaxy page`__
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
66
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
67 .. __: http://usegalaxy.org/cloudmap
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
68
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
69 CloudMap video user guides and Frequently Asked Questions (FAQs) are available at the `Hobert lab website`__
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
70
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
71 .. __: http://hobertlab.org/cloudmap
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
72
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
73 This tool improves upon, and automates, the method described in Doitsidou et al., PLoS One 2010 for mapping causal mutations using whole genome sequencing data.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
74
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
75 Sample CloudMap output for a linked chromosome:
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
76
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
77 .. image:: http://www.hobertlab.org/CloudMap/Linked_LG_500px.png
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
78
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
79
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
80 The polymorphic Hawaiian strain CB4856 is used as a mapping strain in most cases but in principle any sequenced nematode strain that is significantly different from the mutant strain can be used for mapping. The tool plots the ratio of mapping strain (Hawaiian)/mutant strain (N2) nucleotides at all SNP positions, reflecting the number of recombinants in the sequenced pool of animals. Chromosomes which contain regions of linkage to the causal mutation will have regions where the ratio of mapping strain (Hawaiian)/total reads will be equal to 0. The scatter plots for such linked regions will have a high number of data points lying exactly on the X axis. A loess regression line is plotted through all the points on a given chromosome giving further accuracy to the linked region.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
81
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
82 Each scatter plot has a corresponding frequency plot that displays regions of linked chromosomes where pure parental (mutant strain) alleles are concentrated. 1Mb bins for the 0 ratio SNP positions are colored gray by default and .5Mb bins are colored in red. By default, frequency plots of pure parental alleles are normalized to remove false linkage caused by previously described (Seidel et al. 2008) patterns of genetic incompatibility between Bristol and Hawaiian strains. This normalization can be turned off via a checkbox input form setting.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
83
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
84
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
85 The experimental design required to generate data for the plots is described in the CloudMap paper (Fig.6A):
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
86
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
87 .. image:: http://www.hobertlab.org/CloudMap/Doitsidou_2010_PLoS_Fig.1_500px.png
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
88
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
89
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
90 ------
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
91
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
92 **Input:**
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
93
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
94
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
95 This tool accepts as input a single VCF file containing reference (e.g. Bristol) and alternate (e.g. Hawaiian) mapping strain alleles calls at each of the mapping strain variant positions (e.g. 112,000 Hawaiian SNPs) in the pooled mutant sample. This input VCF is generated at an earlier analysis step by running the GATK Unified Genotyper on a BAM alignment file of the pooled mutant sample with a provided reference file of mapping strain variants (e.g. Hawaiian SNPs) in VCF format. The reader is referred to the user guide and online video for direction on this procedure.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
96
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
97 Default GATK Unified Genotyper parameters for mapping quality, base quality and coverage at each SNP position typically yield good results, though users may experiment with adjusting these parameters. In our testing, low threshold filtering on base pair quality (default settings) has been useful in improving accuracy of plots while high threshold filtering on coverage has skewed plot accuracy.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
98
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
99 The required VCF of mapping strain (e.g. Hawaiian) SNPs is a reference file that contains mapping strain SNP positions and reference base pairs at each position. It is available in the `CloudMap Shared Data library`__
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
100
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
101 .. __: http://usegalaxy.org/library
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
102
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
103 You may also make your own VCF of mapping strain variant positions following the steps described in the CloudMap paper.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
104
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
105 The CloudMap Hawaiian Variant Mapping with WGS Data tool supports data from any organism that has been crossed to a mapping strain for which variant information is available. C. elegans and Arabidopsis are natively supported. For all other organisms, users must provide a simple tab-delimited configuration file containing chromosome numbers and respective lengths (example configuration files for most major organisms provided at http://usegalaxy.org/cloudmap). Additional files required for other organisms are the same as described for C. elegans: a VCF file consisting of pooled F2 mutant progeny sequencing data, and a VCF file of the mapping strain variants.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
106
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
107
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
108 **Output:**
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
109
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
110 The tool also provides a tabular output file that contains a count of the number of reference and alternate variants at each mapping strain variant position as well as the ratio of mapping strain (e.g. Hawaiian)/alternate SNPs. The position of each mapping strain SNP in map units and physical coordinates is also provided in the output file.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
111
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
112
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
113 ------
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
114
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
115 **Settings:**
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
116
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
117 .. class:: infomark
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
118
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
119 Information on loess regression and the loess span parameter:
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
120 http://en.wikipedia.org/wiki/Local_regression
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
121
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
122 .. class:: infomark
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
123
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
124 Based on our testing, we've settled on .1 as a loess span default. Larger values result in smoothing of the line to reflect trends at a more macro level. Smaller values result in loess lines that more closely reflect local data fluctuations.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
125
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
126 .. class:: infomark
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
127
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
128 Supported colors for data points and loess regression line:
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
129
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
130 http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
131
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
132 http://research.stowers-institute.org/efg/R/Color/Chart/ColorChart.pdf
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
133
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
134
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
135
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
136 .. class:: warningmark
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
137
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
138 This tool requires that the statistical programming environment R has been installed on the system hosting Galaxy (http://www.r-project.org/). If you are running this tool on Galaxy via the Cloud, this does not apply to you.
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
139
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
140
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
141 ------
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
142
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
143 **Citation:**
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
144
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
145 This tool is part of the CloudMap package from the Hobert Lab. If you use this tool, please cite `Gregory Minevich, Danny S. Park, Daniel Blankenberg, Richard J. Poole, and Oliver Hobert. CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (Genetics 2012 In Press)`__
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
146
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
147 .. __: http://hobertlab.org/cloudmap
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
148
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
149 Correspondence to gm2123@columbia.edu (Gregory Minevich) or r.poole@ucl.ac.uk (Richard J. Poole) or or38@columbia.edu (Oliver Hobert)
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
150 </help>
11da66cb7216 Uploaded
gregory-minevich
parents:
diff changeset
151 </tool>