changeset 1:de08d9401816 default tip

first commit
author nilesh
date Fri, 12 Jul 2013 15:21:36 -0500
parents 714f80d74020
children
files all_fasta.loc.sample somatic_sniper.xml somatic_sniper_wrapper.pl tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 5 files changed, 252 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample	Fri Jul 12 15:21:36 2013 -0500
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3     apiMel3   Honeybee (Apis mellifera): apiMel3     /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon   hg19      Human (Homo sapiens): hg19 Canonical   /path/to/genome/hg19/hg19canon.fa
+#hg19full    hg19      Human (Homo sapiens): hg19 Full        /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/somatic_sniper.xml	Fri Jul 12 15:21:36 2013 -0500
@@ -0,0 +1,171 @@
+<tool id="somatic_sniper_tool" name="Somatic Sniper" version="1.0.2">
+  <description>: identify single nucleotide positions that are different between tumor and normal</description>
+  <requirements>
+        <requirement type="package" version="1.0.0">somatic-sniper</requirement>	
+  </requirements>
+  <command interpreter="perl">
+  	somatic_sniper_wrapper.pl
+
+  	"NORMAL::$normal"
+  	"TUMOR::$tumor"
+	"OUTPUT::$snp_output"
+  	"OPTION::-F $output"
+  	
+  	 #if $option.option == "modify_parameters":
+  	 	"OPTION::-q $option.readFilter"
+  	 	"OPTION::-Q $option.somaticFilter"
+  	 	"OPTION::-s $option.mutationPrior"
+  	 	#if str($option.disablePriors) == "true" 
+  	 		"OPTION::-p"  	 	  	 	
+ 		#end if
+  	 #end if
+  	
+  	"OPTION::-f $reference.fields.path"  	
+	
+
+  </command>
+	<inputs>
+	<param name="reference" type="select" label="Select a reference genome">
+		<options from_data_table="all_fasta">
+			<filter type="sort_by" column="2" />
+			<validator type="no_options" message="No indexes are available" />
+		</options>
+	</param>
+
+	<param format="bam" name="normal" type="data" label="Normal sample" help=""/>
+	<param format="bam" name="tumor" type="data" label="Tumor Sample" help=""/>
+	<param name="output" type="select" label="Output Type" help="" optional="true">
+		<option value="classic" selected="true">Classic</option>
+		<option value="vcf">VCF</option>
+		<option value="bed">BED</option>
+	</param>
+
+
+	<conditional name="option">
+		<param name="option" type="select" label="Optional Parameters" help="" optional="true">
+			<option value="default_parameters" selected="true">Default Parameters</option>
+			<option value="modify_parameters">Modify Parameters</option>
+		</param>
+		<when value="modify_parameters">
+	
+			<param name="readFilter" label="filtering reads with mapping quality less than"  type="integer" value="0" optional="true" />
+			<param name="somaticFilter" label="filtering somatic snv output with somatic quality less than"  type="integer" value="15" optional="true" />
+			<param name="disablePriors" type="select" label="disable priors in the somatic calculation. Increases sensitivity for solid tumors" help="" optional="true">
+				<option value="true"  >true</option>
+				<option value="false" selected="true">false</option>
+			</param>
+			<param name="mutationPrior"  label="prior probability of a somatic mutation" type="float" value="0.10000" optional="true" />
+      
+		</when>
+		
+	</conditional>
+
+	</inputs>
+	<outputs>
+		<data name="snp_output" format="text" label="${tool.name} result on ${on_string}" />
+	</outputs>
+	<help>
+|
+
+
+**Reference**
+	
+  http://gmt.genome.wustl.edu/somatic-sniper/current/
+  
+-----
+
+**What it does**
+
+The purpose of this program is to identify single nucleotide positions that are different between tumor and normal 
+(or, in theory, any two bam files). It takes a tumor bam and a normal bam and compares the two to determine the 
+differences. It outputs a file in a format very similar to Samtools consensus format. It uses the genotype likelihood 
+model of MAQ (as implemented in Samtools) and then calculates the probability that the tumor and normal genotypes are 
+different. This probability is reported as a somatic score. The somatic score is the Phred-scaled probability (between 0 to 255) 
+that the Tumor and Normal genotypes are not different where 0 means there is no probability that the genotypes are different and 
+255 means there is a probability of 1 – 10(255/-10) that the genotypes are different between tumor and normal. This is consistent 
+with how the SAM format reports such probabilities. 
+
+bam-somaticsniper [options] -f ref.fasta tumor.bam normal.bam snp_output_file
+
+Bam files must contain LB tag in @RG line.
+Picard tools can be used to add lines to BAM headers.
+
+-----
+ 
+**Required Parameters**
+
+::
+
+  -f 	FILE REQUIRED reference sequence in the FASTA format
+
+-----
+
+**Options**
+
+::
+
+ -q 	INT    filtering reads with mapping quality less than INT [0]
+ 
+ -Q	INT    filtering somatic snv output with somatic quality less than  INT [15]
+        
+ -p 	FLAG   disable priors in the somatic calculation. Increases sensitivity for solid tumors
+        
+ -J 	FLAG   Use prior probabilities accounting for the somatic mutation rate
+ 
+ -s 	FLOAT  prior probability of a somatic mutation (implies -J) [0.010000]
+        
+ -T 	FLOAT  theta in maq consensus calling model (for -c/-g) [0.850000]
+        
+ -N 	INT    number of haplotypes in the sample (for -c/-g) [2]
+        
+ -r 	FLOAT  prior of a difference between two haplotypes (for -c/-g) [0.001000]
+        
+ -F 	STRING select output format [classic]
+        Available formats:
+        	classic
+        	vcf
+        	bed
+             
+-----            
+
+**File Formats**      
+             
+::
+
+  Classic:
+
+  Each line contains the following tab-separated values:
+
+    1. Chromosome
+    2. Position
+    3. Reference base
+    4. IUB genotype of tumor
+    5. IUB genotype of normal
+    6. Somatic Score
+    7. Tumor Consensus quality
+    8. Tumor variant allele quality
+    9. Tumor mean mapping quality
+    10. Normal Consensus quality
+    11. Normal variant allele quality
+    12. Normal mean mapping quality
+    13. Depth in tumor (# of reads crossing the position)
+    14. Depth in normal (# of reads crossing the position)
+    15. Mean base quality of reads supporting reference in tumor
+    16. Mean mapping quality of reads supporting reference in tumor
+    17. Depth of reads supporting reference in tumor
+    18. Mean base quality of reads supporting variant(s) in tumor
+    19. Mean mapping quality of reads supporting variant(s) in tumor
+    20. Depth of reads supporting variant(s) in tumor
+    21. Mean base quality of reads supporting reference in normal
+    22. Mean mapping quality of reads supporting reference in normal
+    23. Depth of reads supporting reference in normal
+    24. Mean base quality of reads supporting variant(s) in normal
+    25. Mean mapping quality of reads supporting variant(s) in normal
+    26. Depth of reads supporting variant(s) in normal
+
+
+
+	</help>
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/somatic_sniper_wrapper.pl	Fri Jul 12 15:21:36 2013 -0500
@@ -0,0 +1,54 @@
+use strict;
+use warnings;
+use File::Basename; 
+use Cwd;
+use File::Path qw(make_path remove_tree);
+die qq(
+Bad numbr of inputs
+
+) if(!@ARGV);
+
+my $options ="";
+my $normal="";
+my $tumor="";
+my $output="";
+
+
+foreach my $input (@ARGV) 
+{
+	my @tmp = split "::", $input;
+	if($tmp[0] eq "NORMAL") 
+	{
+		$normal = $tmp[1];
+	} 
+	elsif($tmp[0] eq "TUMOR") 
+	{
+		$tumor = $tmp[1];
+	}
+	elsif($tmp[0] eq "OUTPUT") 
+	{
+		$output = $tmp[1];
+	}
+	elsif($tmp[0] eq "OPTION") 
+	{
+		$options = "$options ${tmp[1]}";
+	}
+	
+	else 
+	{
+		die("Unknown Input: $input\n");
+	}
+}
+
+
+my $working_dir = cwd();
+
+system ("ln -s $normal $working_dir/normal.bam");
+system ("samtools index $working_dir/normal.bam");
+ 
+system ("ln -s $tumor $working_dir/tumor.bam");
+system ("samtools index $working_dir/tumor.bam");
+
+system ("bam-somaticsniper $options $working_dir/tumor.bam $working_dir/normal.bam $output");
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Jul 12 15:21:36 2013 -0500
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="all_fasta.loc" />
+    </table>
+</tables>
--- a/tool_dependencies.xml	Fri Jul 12 16:19:01 2013 -0400
+++ b/tool_dependencies.xml	Fri Jul 12 15:21:36 2013 -0500
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="somatic-sniper" version="unstable">
+    <package name="somatic-sniper" version="1.0.0">
         <install version="1.0">
             <actions>