annotate SNP_Mapping.xml @ 13:a90930fbd580 draft

Uploaded
author gregory-minevich
date Thu, 14 Jun 2012 20:35:18 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
1 <tool id="snp_mapping_using_wgs" name="CloudMap: SNP mapping with WGS data">
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
2 <description>Map a mutation by plotting recombination frequencies resulting from crossing to a highly polymorphic strain</description>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
3 <command interpreter="python">
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
4 #if $source.source_select=="elegans" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color $points_color --loess_color $loess_color --output $output --location_plot_output $location_plot_output --standardize $standardize --break_file $source.Celegans
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
5 #else if $source.source_select=="arabadopsis" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color $points_color --loess_color $loess_color --output $output --location_plot_output $location_plot_output --standardize $standardize --break_file $source.Arabadop
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
6 #else if $source.source_select=="other" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color $points_color --loess_color $loess_color --output $output --location_plot_output $location_plot_output --standardize $standardize --break_file $source.Other
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
7 #end if
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
8 </command>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
9
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
10 <inputs>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
11 <conditional name="source">
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
12 <param name="source_select" type="select" label="Please select the species">
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
13 <option value="elegans">C. elegans</option>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
14 <option value="arabadopsis">Arabadopsis</option>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
15 <option value="other">Other</option>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
16 </param>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
17 <when value="elegans">
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
18 <param name="Celegans" type="hidden" value="C.elegans" label="The C.elegans configuration file by default" help="C.elegans help" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
19 </when>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
20 <when value="arabadopsis">
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
21 <param name="Arabadop" type="hidden" value="Arabadopsis" label="The Arabadopsis configuration file by default" help="Arabadopsis help" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
22 </when>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
23 <when value="other">
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
24 <param name="Other" type="data" format="tabular" label="Please select your 'Other species' configuration file from your history" help="Tabular configuration file for Other species support" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
25 </when>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
26 </conditional>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
27 <param name="sample_pileup" size = "125" type="data" format="pileup" label="WGS Mutant Pileup File" help="WGS pileup file from pooled F2 mutants that have been crossed to a mapping strain. The pileup should contain data from only mapping strain (e.g. Hawaiian) SNP positions" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
28 <param name="haw_vcf" size = "125" type="data" format="vcf" label="VCF of mapping strain (e.g. Hawaiian) SNPs" help="A VCF reference file that contains mapping strain SNP positions and reference base pairs at each position"/>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
29 <param name="loess_span" size = "15" type="float" value=".1" label="Loess span" help="Parameter that controls the degree of data smoothing."/>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
30 <param name="d_yaxis" size = "15" type="float" value=".7" label="Y-axis upper limit for dot plot" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
31 <param name="h_yaxis" size = "15" type="integer" value="500" label="Y-axis upper limit for histogram plot" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
32 <param name="points_color" size = "15" type="text" value="gray27" label="Color for data points" help="See below for list of supported colors"/>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
33 <param name="loess_color" size = "15" type="text" value="red" label="Color for loess regression line" help="See below for list of supported colors"/>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
34 <param name="standardize" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Standardize X-axis" help="Dot plots and histogram plots from separate chromosomes will have uniform X-axis spacing for comparison"/>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
35 </inputs>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
36 <outputs>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
37 <data name="output" type="text" format="tabular" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
38 <data name="location_plot_output" format="pdf" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
39 </outputs>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
40 <requirements>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
41 <requirement type="python-module">sys</requirement>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
42 <requirement type="python-module">optparse</requirement>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
43 <requirement type="python-module">csv</requirement>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
44 <requirement type="python-module">re</requirement>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
45 <requirement type="python-module">decimal</requirement>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
46 <requirement type="python-module">rpy</requirement>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
47 </requirements>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
48 <tests>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
49 <param name="sample_pileup" value="" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
50 <param name="haw_vcf" value="" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
51 <output name="output" file="" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
52 <output name="plot_output" file="" />
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
53 </tests>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
54 <help>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
55 **What it does:**
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
56
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
57 This tool is part of the CloudMap pipeline for analysis of mutant genome sequences. For further details, please see `Gregory Minevich, Danny Park, Richard J. Poole and Oliver Hobert. CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (2012 In Preparation)`__
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
58
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
59 .. __: http://biochemistry.hs.columbia.edu/labs/hobert/literature.html
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
60
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
61 This tool improves upon the method described in Doitsidou et al., PLoS One 2010 for mapping causal mutations using whole genome sequencing data.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
62
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
63 Sample output for a linked chromosome:
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
64
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
65 .. image:: http://biochemistry.hs.columbia.edu/labs/hobert/CloudMap/Linked_LG_500px.png
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
66
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
67
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
68 The polymorphic Hawaiian strain CB4856 is used as a mapping strain in most cases but in principle any sequenced nematode strain that is significantly different from the mutant strain can be used for mapping. The tool plots the ratio of mapping strain (Hawaiian)/mutant strain (N2) nucleotides at all SNP positions, reflecting the number of recombinants in the sequenced pool of animals. Chromosomes which contain regions of linkage to the causal mutation will have regions where the ratio of mapping strain (Hawaiian)/total reads will be equal to 0. The scatter plots for such linked regions will have a high number of data points lying exactly on the X axis. A loess regression line is plotted through all the points on a given chromosome giving further accuracy to the linked region.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
69
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
70 Each scatter plot has a corresponding frequency plot that displays regions of linked chromosome where 0 ratio SNP positions are concentrated. 1Mb bins for the 0 ratio SNP positions are colored gray by default and .5Mb bins are colored in red.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
71
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
72
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
73 The experimental design required to generate data for the plots is described in Doitsidou et al., PLoS One 2010 Figure 1:
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
74
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
75 .. image:: http://biochemistry.hs.columbia.edu/labs/hobert/CloudMap/Doitsidou_2010_PLoS_Fig.1_500px.png
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
76
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
77
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
78 ------
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
79
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
80 **Input:**
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
81
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
82
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
83 The input pileup files are generated by the SAMTools mpileup tool. Default SAMTools mpileup (and Samtools filter pileup) parameters for mapping quality, base quality and coverage at each SNP position typically yield good results, though users may experiment with filtering SNP data by adjusting these parameters. In our testing, low threshold filtering on base pair quality has been useful in improving accuracy of plots while high threshold filtering on coverage has skewed plot accuracy.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
84
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
85 This tool requires a pileup that has been created at each SNP position using SAMTools mpileup (http://samtools.sourceforge.net/samtools.shtml) and a BED file of all Hawaiian SNP positions. Download Hawaiian SNP positions BED file here:
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
86 http://biochemistry.hs.columbia.edu/labs/hobert/protocols.html
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
87
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
88 The required VCF of mapping strain (e.g. Hawaiian) SNPs is a reference file that contains mapping strain SNP positions and reference base pairs at each position.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
89 (download Hawaiian SNPs VCF from: http://biochemistry.hs.columbia.edu/labs/hobert/protocols.html). You may also make your own VCF of SNP positions following the steps described in the CloudMAP paper.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
90
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
91
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
92 **Output:**
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
93
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
94 The tool also provides a tabular output file that contains a count of the number of reference and alternate SNPs at each mapping strain SNP position as well as the ratio of reference/alternate SNPs. The position of each mapping strain SNP in map units and physical coordinates is also provided in the output file.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
95
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
96
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
97 ------
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
98
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
99 **Settings:**
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
100
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
101 .. class:: infomark
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
102
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
103 Information on loess regression and the loess span parameter:
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
104 http://en.wikipedia.org/wiki/Local_regression
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
105
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
106 .. class:: infomark
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
107
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
108 Based on our testing, we've settled on .01 as a loess span default. Larger values result in smoothing of the line to reflect trends at a more macro level. Smaller values result in loess lines that more closely reflect local data fluctuations. Users looking at chromosome subregions will want to increase the loess span.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
109
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
110 .. class:: infomark
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
111
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
112 Supported colors for data points and loess regression line:
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
113
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
114 http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
115
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
116 http://research.stowers-institute.org/efg/R/Color/Chart/ColorChart.pdf
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
117
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
118
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
119
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
120 .. class:: warningmark
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
121
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
122 This tool requires that the statistical programming environment R has been installed on the system hosting Galaxy (http://www.r-project.org/). If you are accessing this tool on Galaxy via the Cloud, this does not apply to you.
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
123
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
124
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
125 ------
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
126
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
127 **Citation:**
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
128
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
129 This tool is part of the CloudMap package from the Hobert Lab. If you use this tool, please cite `Gregory Minevich, Danny Park, Richard J. Poole and Oliver Hobert. CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (2012 In Preparation)`__
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
130
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
131 .. __: http://biochemistry.hs.columbia.edu/labs/hobert/literature.html
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
132
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
133 Correspondence to gm2123@columbia.edu (G.M.) or or38@columbia.edu (O.H.)
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
134
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
135 </help>
a90930fbd580 Uploaded
gregory-minevich
parents:
diff changeset
136 </tool>