Mercurial > repos > nilesh > somaticsniper
changeset 1:de08d9401816 default tip
first commit
author | nilesh |
---|---|
date | Fri, 12 Jul 2013 15:21:36 -0500 |
parents | 714f80d74020 |
children | |
files | all_fasta.loc.sample somatic_sniper.xml somatic_sniper_wrapper.pl tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 5 files changed, 252 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all_fasta.loc.sample Fri Jul 12 15:21:36 2013 -0500 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/somatic_sniper.xml Fri Jul 12 15:21:36 2013 -0500 @@ -0,0 +1,171 @@ +<tool id="somatic_sniper_tool" name="Somatic Sniper" version="1.0.2"> + <description>: identify single nucleotide positions that are different between tumor and normal</description> + <requirements> + <requirement type="package" version="1.0.0">somatic-sniper</requirement> + </requirements> + <command interpreter="perl"> + somatic_sniper_wrapper.pl + + "NORMAL::$normal" + "TUMOR::$tumor" + "OUTPUT::$snp_output" + "OPTION::-F $output" + + #if $option.option == "modify_parameters": + "OPTION::-q $option.readFilter" + "OPTION::-Q $option.somaticFilter" + "OPTION::-s $option.mutationPrior" + #if str($option.disablePriors) == "true" + "OPTION::-p" + #end if + #end if + + "OPTION::-f $reference.fields.path" + + + </command> + <inputs> + <param name="reference" type="select" label="Select a reference genome"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + + <param format="bam" name="normal" type="data" label="Normal sample" help=""/> + <param format="bam" name="tumor" type="data" label="Tumor Sample" help=""/> + <param name="output" type="select" label="Output Type" help="" optional="true"> + <option value="classic" selected="true">Classic</option> + <option value="vcf">VCF</option> + <option value="bed">BED</option> + </param> + + + <conditional name="option"> + <param name="option" type="select" label="Optional Parameters" help="" optional="true"> + <option value="default_parameters" selected="true">Default Parameters</option> + <option value="modify_parameters">Modify Parameters</option> + </param> + <when value="modify_parameters"> + + <param name="readFilter" label="filtering reads with mapping quality less than" type="integer" value="0" optional="true" /> + <param name="somaticFilter" label="filtering somatic snv output with somatic quality less than" type="integer" value="15" optional="true" /> + <param name="disablePriors" type="select" label="disable priors in the somatic calculation. Increases sensitivity for solid tumors" help="" optional="true"> + <option value="true" >true</option> + <option value="false" selected="true">false</option> + </param> + <param name="mutationPrior" label="prior probability of a somatic mutation" type="float" value="0.10000" optional="true" /> + + </when> + + </conditional> + + </inputs> + <outputs> + <data name="snp_output" format="text" label="${tool.name} result on ${on_string}" /> + </outputs> + <help> +| + + +**Reference** + + http://gmt.genome.wustl.edu/somatic-sniper/current/ + +----- + +**What it does** + +The purpose of this program is to identify single nucleotide positions that are different between tumor and normal +(or, in theory, any two bam files). It takes a tumor bam and a normal bam and compares the two to determine the +differences. It outputs a file in a format very similar to Samtools consensus format. It uses the genotype likelihood +model of MAQ (as implemented in Samtools) and then calculates the probability that the tumor and normal genotypes are +different. This probability is reported as a somatic score. The somatic score is the Phred-scaled probability (between 0 to 255) +that the Tumor and Normal genotypes are not different where 0 means there is no probability that the genotypes are different and +255 means there is a probability of 1 – 10(255/-10) that the genotypes are different between tumor and normal. This is consistent +with how the SAM format reports such probabilities. + +bam-somaticsniper [options] -f ref.fasta tumor.bam normal.bam snp_output_file + +Bam files must contain LB tag in @RG line. +Picard tools can be used to add lines to BAM headers. + +----- + +**Required Parameters** + +:: + + -f FILE REQUIRED reference sequence in the FASTA format + +----- + +**Options** + +:: + + -q INT filtering reads with mapping quality less than INT [0] + + -Q INT filtering somatic snv output with somatic quality less than INT [15] + + -p FLAG disable priors in the somatic calculation. Increases sensitivity for solid tumors + + -J FLAG Use prior probabilities accounting for the somatic mutation rate + + -s FLOAT prior probability of a somatic mutation (implies -J) [0.010000] + + -T FLOAT theta in maq consensus calling model (for -c/-g) [0.850000] + + -N INT number of haplotypes in the sample (for -c/-g) [2] + + -r FLOAT prior of a difference between two haplotypes (for -c/-g) [0.001000] + + -F STRING select output format [classic] + Available formats: + classic + vcf + bed + +----- + +**File Formats** + +:: + + Classic: + + Each line contains the following tab-separated values: + + 1. Chromosome + 2. Position + 3. Reference base + 4. IUB genotype of tumor + 5. IUB genotype of normal + 6. Somatic Score + 7. Tumor Consensus quality + 8. Tumor variant allele quality + 9. Tumor mean mapping quality + 10. Normal Consensus quality + 11. Normal variant allele quality + 12. Normal mean mapping quality + 13. Depth in tumor (# of reads crossing the position) + 14. Depth in normal (# of reads crossing the position) + 15. Mean base quality of reads supporting reference in tumor + 16. Mean mapping quality of reads supporting reference in tumor + 17. Depth of reads supporting reference in tumor + 18. Mean base quality of reads supporting variant(s) in tumor + 19. Mean mapping quality of reads supporting variant(s) in tumor + 20. Depth of reads supporting variant(s) in tumor + 21. Mean base quality of reads supporting reference in normal + 22. Mean mapping quality of reads supporting reference in normal + 23. Depth of reads supporting reference in normal + 24. Mean base quality of reads supporting variant(s) in normal + 25. Mean mapping quality of reads supporting variant(s) in normal + 26. Depth of reads supporting variant(s) in normal + + + + </help> +</tool> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/somatic_sniper_wrapper.pl Fri Jul 12 15:21:36 2013 -0500 @@ -0,0 +1,54 @@ +use strict; +use warnings; +use File::Basename; +use Cwd; +use File::Path qw(make_path remove_tree); +die qq( +Bad numbr of inputs + +) if(!@ARGV); + +my $options =""; +my $normal=""; +my $tumor=""; +my $output=""; + + +foreach my $input (@ARGV) +{ + my @tmp = split "::", $input; + if($tmp[0] eq "NORMAL") + { + $normal = $tmp[1]; + } + elsif($tmp[0] eq "TUMOR") + { + $tumor = $tmp[1]; + } + elsif($tmp[0] eq "OUTPUT") + { + $output = $tmp[1]; + } + elsif($tmp[0] eq "OPTION") + { + $options = "$options ${tmp[1]}"; + } + + else + { + die("Unknown Input: $input\n"); + } +} + + +my $working_dir = cwd(); + +system ("ln -s $normal $working_dir/normal.bam"); +system ("samtools index $working_dir/normal.bam"); + +system ("ln -s $tumor $working_dir/tumor.bam"); +system ("samtools index $working_dir/tumor.bam"); + +system ("bam-somaticsniper $options $working_dir/tumor.bam $working_dir/normal.bam $output"); + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jul 12 15:21:36 2013 -0500 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="all_fasta.loc" /> + </table> +</tables>
--- a/tool_dependencies.xml Fri Jul 12 16:19:01 2013 -0400 +++ b/tool_dependencies.xml Fri Jul 12 15:21:36 2013 -0500 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="somatic-sniper" version="unstable"> + <package name="somatic-sniper" version="1.0.0"> <install version="1.0"> <actions>