changeset 13:a90930fbd580 draft

Uploaded
author gregory-minevich
date Thu, 14 Jun 2012 20:35:18 -0400
parents 9c28b8aebe84
children 837e392903de
files SNP_Mapping.xml
diffstat 1 files changed, 136 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SNP_Mapping.xml	Thu Jun 14 20:35:18 2012 -0400
@@ -0,0 +1,136 @@
+<tool id="snp_mapping_using_wgs" name="CloudMap: SNP mapping with WGS data">
+    <description>Map a mutation by plotting recombination frequencies resulting from crossing to a highly polymorphic strain</description>
+    <command interpreter="python">
+	#if $source.source_select=="elegans" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color $points_color --loess_color $loess_color --output $output --location_plot_output $location_plot_output --standardize $standardize --break_file $source.Celegans 
+	#else if  $source.source_select=="arabadopsis" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color $points_color --loess_color $loess_color --output $output --location_plot_output $location_plot_output --standardize $standardize --break_file $source.Arabadop 
+	#else if  $source.source_select=="other" #SNP_Mapping.py --sample_pileup $sample_pileup --haw_vcf $haw_vcf --loess_span $loess_span --d_yaxis $d_yaxis --h_yaxis $h_yaxis --points_color $points_color --loess_color $loess_color --output $output --location_plot_output $location_plot_output --standardize $standardize --break_file $source.Other 
+	#end if 
+    </command>
+
+    <inputs>
+	<conditional name="source">
+		<param name="source_select" type="select" label="Please select the species">
+		        <option value="elegans">C. elegans</option>
+        		<option value="arabadopsis">Arabadopsis</option>
+        		<option value="other">Other</option>
+      		</param>
+      		<when value="elegans">
+        		<param name="Celegans" type="hidden" value="C.elegans" label="The C.elegans configuration file by default" help="C.elegans help" />
+		</when>
+      		<when value="arabadopsis">
+        		<param name="Arabadop" type="hidden" value="Arabadopsis" label="The Arabadopsis configuration file by default" help="Arabadopsis help" />
+		</when>
+      		<when value="other">
+        		<param name="Other" type="data" format="tabular" label="Please select your 'Other species' configuration file from your history" help="Tabular configuration file for Other species support" />
+		</when>
+        </conditional>  
+        	<param name="sample_pileup" size = "125" type="data" format="pileup" label="WGS Mutant Pileup File" help="WGS pileup file from pooled F2 mutants that have been crossed to a mapping strain. The pileup should contain data from only mapping strain (e.g. Hawaiian) SNP positions" />
+		<param name="haw_vcf" size = "125" type="data" format="vcf" label="VCF of mapping strain (e.g. Hawaiian) SNPs" help="A VCF reference file that contains mapping strain SNP positions and reference base pairs at each position"/>
+		<param name="loess_span" size = "15" type="float" value=".1" label="Loess span" help="Parameter that controls the degree of data smoothing."/>    
+		<param name="d_yaxis" size = "15" type="float" value=".7" label="Y-axis upper limit for dot plot" />
+		<param name="h_yaxis" size = "15" type="integer" value="500" label="Y-axis upper limit for histogram plot" />
+		<param name="points_color" size = "15" type="text" value="gray27" label="Color for data points" help="See below for list of supported colors"/> 
+		<param name="loess_color" size = "15" type="text" value="red" label="Color for loess regression line" help="See below for list of supported colors"/>
+		<param name="standardize" type="boolean" truevalue="true" falsevalue="false" checked="true"  label="Standardize X-axis" help="Dot plots and histogram plots from separate chromosomes will have uniform X-axis spacing for comparison"/>
+    </inputs>
+    <outputs>
+        <data name="output" type="text" format="tabular" />
+	<data name="location_plot_output" format="pdf" />
+    </outputs>
+    <requirements>
+        <requirement type="python-module">sys</requirement>
+        <requirement type="python-module">optparse</requirement>
+        <requirement type="python-module">csv</requirement>
+        <requirement type="python-module">re</requirement>
+	<requirement type="python-module">decimal</requirement>
+        <requirement type="python-module">rpy</requirement>
+    </requirements>
+    <tests>
+	<param name="sample_pileup" value="" />
+	<param name="haw_vcf" value="" />
+	<output name="output" file="" />
+	<output name="plot_output" file="" />
+    </tests>
+    <help>
+**What it does:** 
+
+This tool is part of the CloudMap pipeline for analysis of mutant genome sequences. For further details, please see `Gregory Minevich, Danny Park, Richard J. Poole and Oliver Hobert.  CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (2012 In Preparation)`__
+
+    .. __: http://biochemistry.hs.columbia.edu/labs/hobert/literature.html
+
+This tool improves upon the method described in Doitsidou et al., PLoS One 2010 for mapping causal mutations using whole genome sequencing data. 
+
+Sample output for a linked chromosome:
+
+.. image:: http://biochemistry.hs.columbia.edu/labs/hobert/CloudMap/Linked_LG_500px.png
+
+
+The polymorphic Hawaiian strain CB4856 is used as a mapping strain in most cases but in principle any sequenced nematode strain that is significantly different from the mutant strain can be used for mapping. The tool plots the ratio of mapping strain (Hawaiian)/mutant strain (N2) nucleotides at all SNP positions, reflecting the number of recombinants in the sequenced pool of animals. Chromosomes which contain regions of linkage to the causal mutation will have regions where the ratio of mapping strain (Hawaiian)/total reads will be equal to 0. The scatter plots for such linked regions will have a high number of data points lying exactly on the X axis. A loess regression line is plotted through all the points on a given chromosome giving further accuracy to the linked region. 
+
+Each scatter plot has a corresponding frequency plot that displays regions of linked chromosome where 0 ratio SNP positions are concentrated. 1Mb bins for the 0 ratio SNP positions are colored gray by default and .5Mb bins are colored in red.
+
+
+The experimental design required to generate data for the plots is described in Doitsidou et al., PLoS One 2010 Figure 1:
+
+.. image:: http://biochemistry.hs.columbia.edu/labs/hobert/CloudMap/Doitsidou_2010_PLoS_Fig.1_500px.png
+
+
+------
+
+**Input:** 
+
+
+The input pileup files are generated by the SAMTools mpileup tool. Default SAMTools mpileup (and Samtools filter pileup) parameters for mapping quality, base quality and coverage at each SNP position typically yield good results, though users may experiment with filtering SNP data by adjusting these parameters. In our testing, low threshold filtering on base pair quality has been useful in improving accuracy of plots while high threshold filtering on coverage has skewed plot accuracy.  
+
+This tool requires a pileup that has been created at each SNP position using SAMTools mpileup (http://samtools.sourceforge.net/samtools.shtml) and a BED file of all Hawaiian SNP positions. Download Hawaiian SNP positions BED file here:
+http://biochemistry.hs.columbia.edu/labs/hobert/protocols.html
+
+The required VCF of mapping strain (e.g. Hawaiian) SNPs is a reference file that contains mapping strain SNP positions and reference base pairs at each position.
+(download Hawaiian SNPs VCF from: http://biochemistry.hs.columbia.edu/labs/hobert/protocols.html). You may also make your own VCF of SNP positions following the steps described in the CloudMAP paper.
+
+
+**Output:**
+
+The tool also provides a tabular output file that contains a count of the number of reference and alternate SNPs at each mapping strain SNP position as well as the ratio of reference/alternate SNPs. The position of each mapping strain SNP in map units and physical coordinates is also provided in the output file.  
+
+
+------
+
+**Settings:**
+
+.. class:: infomark
+
+Information on loess regression and the loess span parameter:
+http://en.wikipedia.org/wiki/Local_regression
+
+.. class:: infomark
+
+Based on our testing, we've settled on .01 as a loess span default. Larger values result in smoothing of the line to reflect trends at a more macro level. Smaller values result in loess lines that more closely reflect local data fluctuations. Users looking at chromosome subregions will want to increase the loess span.
+
+.. class:: infomark
+
+Supported colors for data points and loess regression line:
+
+http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
+
+http://research.stowers-institute.org/efg/R/Color/Chart/ColorChart.pdf
+
+
+
+.. class:: warningmark
+
+This tool requires that the statistical programming environment R has been installed on the system hosting Galaxy (http://www.r-project.org/). If you are accessing this tool on Galaxy via the Cloud, this does not apply to you.
+
+
+------
+
+**Citation:**
+
+This tool is part of the CloudMap package from the Hobert Lab. If you use this tool, please cite `Gregory Minevich, Danny Park, Richard J. Poole and Oliver Hobert.  CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (2012 In Preparation)`__
+
+    .. __: http://biochemistry.hs.columbia.edu/labs/hobert/literature.html
+
+Correspondence to gm2123@columbia.edu (G.M.) or or38@columbia.edu (O.H.)
+
+    </help>
+</tool>