annotate SNP_Mapping.xml @ 20:98d409af683c draft

Uploaded
author gregory-minevich
date Thu, 28 Jun 2012 14:21:31 -0400
parents 9b401db6f9ea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
1 <tool id="snp_mapping_using_wgs" name="CloudMap: SNP mapping with WGS data">
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
2 <description>Map a mutation by plotting recombination frequencies resulting from crossing to a highly polymorphic strain</description>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
3 <command interpreter="python">
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
4 #if $source.source_select=="elegans" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color "$points_color" --loess_color "$loess_color" --output $output --location_plot_output $location_plot_output --standardize $standardize --normalize_bins $normalize_bins --break_file $source.Celegans
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
5 #else if $source.source_select=="arabadopsis" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color "$points_color" --loess_color "$loess_color" --output $output --location_plot_output $location_plot_output --standardize $standardize --normalize_bins $normalize_bins --break_file $source.Arabadop
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
6 #else if $source.source_select=="other" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color "$points_color" --loess_color "$loess_color" --output $output --location_plot_output $location_plot_output --standardize $standardize --normalize_bins $normalize_bins --break_file $source.Other
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
7 #end if
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
8 </command>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
9
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
10 <inputs>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
11 <conditional name="source">
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
12 <param name="source_select" type="select" label="Please select the species">
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
13 <option value="elegans">C. elegans</option>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
14 <option value="arabadopsis">Arabadopsis</option>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
15 <option value="other">Other</option>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
16 </param>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
17 <when value="elegans">
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
18 <param name="Celegans" type="hidden" value="C.elegans" label="The C.elegans configuration file by default" help="C.elegans help" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
19 </when>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
20 <when value="arabadopsis">
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
21 <param name="Arabadop" type="hidden" value="Arabadopsis" label="The Arabadopsis configuration file by default" help="Arabadopsis help" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
22 </when>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
23 <when value="other">
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
24 <param name="Other" type="data" format="tabular" label="Please select your 'Other species' configuration file from your history" help="Tabular configuration file for Other species support" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
25 </when>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
26 </conditional>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
27 <param name="sample_pileup" size = "125" type="data" format="pileup" label="WGS Mutant Pileup File" help="WGS pileup file from pooled F2 mutants that have been crossed to a mapping strain. The pileup should contain data from only mapping strain (e.g. Hawaiian) SNP positions" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
28 <param name="haw_vcf" size = "125" type="data" format="vcf" label="VCF of mapping strain (e.g. Hawaiian) SNPs" help="A VCF reference file that contains mapping strain SNP positions and reference base pairs at each position"/>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
29 <param name="loess_span" size = "15" type="float" value=".1" label="Loess span" help="Parameter that controls the degree of data smoothing."/>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
30 <param name="d_yaxis" size = "15" type="float" value=".7" label="Y-axis upper limit for scatter plot" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
31 <param name="h_yaxis" size = "15" type="integer" value="500" label="Y-axis upper limit for frequency plot" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
32 <param name="points_color" size = "15" type="text" value="gray27" label="Color for data points" help="See below for list of supported colors"/>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
33 <param name="loess_color" size = "15" type="text" value="red" label="Color for loess regression line" help="See below for list of supported colors"/>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
34 <param name="standardize" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Standardize X-axis" help="Scatter plots and frequency plots from separate chromosomes will have uniform X-axis spacing for comparison"/>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
35 <param name="normalize_bins" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Normalize frequency plots" help="Frequency plots of pure parental allele counts will be normalized according to the equation in Fig.7B of the CloudMap paper"/>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
36 </inputs>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
37 <outputs>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
38 <data name="output" type="text" format="tabular" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
39 <data name="location_plot_output" format="pdf" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
40 </outputs>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
41 <requirements>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
42 <requirement type="python-module">sys</requirement>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
43 <requirement type="python-module">optparse</requirement>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
44 <requirement type="python-module">csv</requirement>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
45 <requirement type="python-module">re</requirement>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
46 <requirement type="python-module">decimal</requirement>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
47 <requirement type="python-module">rpy</requirement>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
48 </requirements>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
49 <tests>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
50 <param name="sample_pileup" value="" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
51 <param name="haw_vcf" value="" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
52 <output name="output" file="" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
53 <output name="plot_output" file="" />
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
54 </tests>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
55 <help>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
56 **What it does:**
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
57
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
58 This tool is part of the CloudMap pipeline for analysis of mutant genome sequences. For further details, please see `Gregory Minevich, Danny Park, Richard J. Poole, Daniel Blankenberg, Anton Nekrutenko, and Oliver Hobert. CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (2012 In Preparation)`__
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
59
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
60 .. __: http://biochemistry.hs.columbia.edu/labs/hobert/literature.html
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
61
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
62 CloudMap workflows, shared histories and reference datasets are available at the `CloudMap Galaxy page`__
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
63
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
64 .. __: https://test.g2.bx.psu.edu/u/gal40/p/cloudmap
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
65
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
66 This tool improves upon the method described in Doitsidou et al., PLoS One 2010 for mapping causal mutations using whole genome sequencing data.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
67
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
68 Sample output for a linked chromosome:
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
69
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
70 .. image:: http://biochemistry.hs.columbia.edu/labs/hobert/CloudMap/Linked_LG_500px.png
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
71
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
72
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
73 The polymorphic Hawaiian strain CB4856 is used as a mapping strain in most cases but in principle any sequenced nematode strain that is significantly different from the mutant strain can be used for mapping. The tool plots the ratio of mapping strain (Hawaiian)/mutant strain (N2) nucleotides at all SNP positions, reflecting the number of recombinants in the sequenced pool of animals. Chromosomes which contain regions of linkage to the causal mutation will have regions where the ratio of mapping strain (Hawaiian)/total reads will be equal to 0. The scatter plots for such linked regions will have a high number of data points lying exactly on the X axis. A loess regression line is plotted through all the points on a given chromosome giving further accuracy to the linked region.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
74
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
75 Each scatter plot has a corresponding frequency plot that displays regions of linked chromosome where 0 ratio SNP positions are concentrated. 1Mb bins for the 0 ratio SNP positions are colored gray by default and .5Mb bins are colored in red.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
76
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
77
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
78 The experimental design required to generate data for the plots is described in Doitsidou et al., PLoS One 2010 Figure 1:
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
79
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
80 .. image:: http://biochemistry.hs.columbia.edu/labs/hobert/CloudMap/Doitsidou_2010_PLoS_Fig.1_500px.png
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
81
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
82
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
83 ------
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
84
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
85 **Input:**
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
86
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
87
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
88 The input pileup files are generated by the SAMTools mpileup tool. Default SAMTools mpileup (and Samtools filter pileup) parameters for mapping quality, base quality and coverage at each SNP position typically yield good results, though users may experiment with filtering SNP data by adjusting these parameters. In our testing, low threshold filtering on base pair quality has been useful in improving accuracy of plots while high threshold filtering on coverage has skewed plot accuracy.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
89
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
90 This tool requires a pileup that has been created at each SNP position using SAMTools mpileup (http://samtools.sourceforge.net/samtools.shtml) and a BED file of all Hawaiian SNP positions. Download Hawaiian SNP positions BED file here:
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
91 http://biochemistry.hs.columbia.edu/labs/hobert/protocols.html
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
92
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
93 The required VCF of mapping strain (e.g. Hawaiian) SNPs is a reference file that contains mapping strain SNP positions and reference base pairs at each position.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
94 (download Hawaiian SNPs VCF from: http://biochemistry.hs.columbia.edu/labs/hobert/protocols.html). You may also make your own VCF of SNP positions following the steps described in the CloudMAP paper.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
95
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
96
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
97 **Output:**
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
98
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
99 The tool also provides a tabular output file that contains a count of the number of reference and alternate SNPs at each mapping strain SNP position as well as the ratio of reference/alternate SNPs. The position of each mapping strain SNP in map units and physical coordinates is also provided in the output file.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
100
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
101
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
102 ------
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
103
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
104 **Settings:**
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
105
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
106 .. class:: infomark
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
107
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
108 Information on loess regression and the loess span parameter:
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
109 http://en.wikipedia.org/wiki/Local_regression
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
110
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
111 .. class:: infomark
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
112
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
113 Based on our testing, we've settled on .1 as a loess span default. Larger values result in smoothing of the line to reflect trends at a more macro level. Smaller values result in loess lines that more closely reflect local data fluctuations.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
114
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
115 .. class:: infomark
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
116
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
117 Supported colors for data points and loess regression line:
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
118
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
119 http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
120
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
121 http://research.stowers-institute.org/efg/R/Color/Chart/ColorChart.pdf
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
122
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
123
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
124
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
125 .. class:: warningmark
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
126
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
127 This tool requires that the statistical programming environment R has been installed on the system hosting Galaxy (http://www.r-project.org/). If you are accessing this tool on Galaxy via the Cloud, this does not apply to you.
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
128
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
129
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
130 ------
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
131
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
132 **Citation:**
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
133
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
134 This tool is part of the CloudMap package from the Hobert Lab. If you use this tool, please cite `Gregory Minevich, Danny Park, Richard J. Poole, Daniel Blankenberg, Anton Nekrutenko, and Oliver Hobert. CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (2012 In Preparation)`__
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
135
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
136 .. __: http://biochemistry.hs.columbia.edu/labs/hobert/literature.html
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
137
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
138 Correspondence to gm2123@columbia.edu (G.M.) or or38@columbia.edu (O.H.)
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
139
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
140 </help>
9b401db6f9ea Uploaded
gregory-minevich
parents:
diff changeset
141 </tool>