Mercurial > repos > fcaramia > varscan
changeset 2:51969e284317 draft default tip
Uploaded
author | fcaramia |
---|---|
date | Thu, 20 Jun 2013 00:00:22 -0400 |
parents | 893954763c0e |
children | |
files | tool_dependencies.xml varscan_mpileup.pl varscan_mpileup.xml varscan_somatic.pl varscan_somatic.xml |
diffstat | 5 files changed, 528 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jun 20 00:00:22 2013 -0400 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="VarScan" version="2.3.5"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.5.jar</action> + <action type="move_file"> + <source>VarScan.v2.3.5.jar</source> + <destination>$INSTALL_DIR/jars</destination> + </action> + <action type="set_environment"> + <environment_variable name="JAVA_JAR_PATH" action="set_to">$INSTALL_DIR/jars</environment_variable> + </action> + </actions> + </install> + <readme> + </readme> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan_mpileup.pl Thu Jun 20 00:00:22 2013 -0400 @@ -0,0 +1,116 @@ +#!/usr/bin/perl + +use strict; +use Cwd; + +die qq( +Bad numbr of inputs + +) if(!@ARGV); + +my $options =""; +my $file=""; +my $command=""; +my $output=""; +my $working_dir = cwd(); +my $temp_vcf = "$working_dir/temp"; +my $log=""; + +foreach my $input (@ARGV) +{ + my @tmp = split "::", $input; + if($tmp[0] eq "COMMAND") + { + $command = $tmp[1]; + } + elsif($tmp[0] eq "INPUT") + { + $file = $tmp[1]; + } + elsif($tmp[0] eq "OPTION") + { + $options = "$options ${tmp[1]}"; + } + elsif($tmp[0] eq "OUTPUT") + { + $output = $tmp[1]; + } + elsif($tmp[0] eq "LOG") + { + $log = $tmp[1]; + } + else + { + die("Unknown Input: $input\n"); + } +} + +system ("$command $file $options 1>$temp_vcf 2>$log"); + +vs2vcf($temp_vcf, $output); + + +sub vs2vcf +{ + + # + # G l o b a l v a r i a b l e s + # + my $version = '0.1'; + + # + # Read in file + # + my $input = shift; + my $output = shift; + my $chr_ord = shift; + open(IN, $input) or die "Can't open $input': $!\n"; + open(OUT, ">$output") or die "Can't create $output': $!\n"; + my %output; + + while ( <IN> ) + { + if ( /^#/ ) + { + print OUT; + next; + } + chomp; + my $line = $_; + + my @flds = split ( "\t", $line ); + my $ref = $flds[3]; + my $alt = $flds[4]; + # + # Deletion of bases + # + if ( $alt =~ /^\-/ ) + { + ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref); + } + + # + # Insertion of bases + # + if ( $alt =~ /^\+/ ) + { + $flds[4] = $ref.substr($alt,1); + } + print OUT join( "\t", @flds),"\n" unless defined $chr_ord; + $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord; + } + close(IN); + # if chromosome order given return in sorted order + if(defined $chr_ord) + { + for my $chrom (@{ $chr_ord }) + { + for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) + { + print OUT $output{$chrom}{$pos}; + } + } + } + close(OUT); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan_mpileup.xml Thu Jun 20 00:00:22 2013 -0400 @@ -0,0 +1,124 @@ +<tool id="varscan_mpileup" name="VarScan mpileup" version="2.3.5"> + <description> + mutation caller for targeted, exome, and whole-genome resequencing + </description> + <requirements> + <requirement type="package" version="2.3.5">VarScan</requirement> + </requirements> + <command interpreter="perl"> + + varscan_mpileup.pl + "COMMAND::java -jar \$JAVA_JAR_PATH/VarScan.v2.3.5.jar $exe_command" + "INPUT::$in_file" + "OUTPUT::$output" + "LOG::$log" + "OPTION::--min-coverage $min_coverage" + "OPTION::--min-reads2 $min_reads2" + "OPTION::--min-avg-qual $min_avg_qual" + "OPTION::--min-var-freq $min_var_freq" + "OPTION::--min-freq-for-hom $min_freq_for_hom" + "OPTION::--p-value $p_value" + "OPTION::--strand-filter $strand_filter" + "OPTION::--output-vcf 1" + + #if ($vcf_sample_list): + "OPTION::--vcf-sample-list $vcf_sample_list" + #end if + "OPTION::--variants $variants" + + + + </command> + + <inputs> + + <param name="exe_command" type="select" label="Command" help="" optional="false"> + <option value="mpileup2snp" >mpileup2snp</option> + <option value="mpileup2indel">mpileup2indel</option> + <option value="mpileup2cns">mpileup2cns</option> + </param> + <param name="in_file" type="data" format="pileup" label="mpileup file" help="The SAMtools mpileup file" /> + <param name="min_coverage" type="integer" label="min-coverage" help="" optional="true" value="8"/> + <param name="min_reads2" type="integer" label="min-reads2" help="" optional="true" value="2"/> + <param name="min_avg_qual" type="integer" label="min-avg-qual" help="" optional="true" value="15"/> + <param name="min_var_freq" type="float" label="min-var-freq" help="" optional="true" value="0.01"/> + <param name="min_freq_for_hom" type="float" label="min-freq-for-hom" help="" optional="true" value="0.75"/> + <param name="p_value" type="text" label="p-value" help="" optional="true" value="0.99"/> + <param name="strand_filter" type="integer" label="strand-filter" help="" optional="true" value="1"/> + <param name="vcf_sample_list" type="data" label="vcf-sample-list" format="txt" help="" optional="true" /> + <param name="variants" type="integer" label="variants" help="Set to 1 to report only variants" optional="true" value="1"/> + + + </inputs> + <outputs> + <data type="data" format="vcf" name="output" label="${tool.name} result on ${on_string}"/> + <data type="data" format="txt" name="log" label="${tool.name} result on ${on_string} (log) "/> + </outputs> + + <help> + +.. class:: infomark + +**What it does** + +:: + + VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. It can be used to detect different types of variation: + + Germline variants (SNPs an dindels) in individual samples or pools of samples. + Multi-sample variants (shared or private) in multi-sample datasets (with mpileup). + Somatic mutations, LOH events, and germline variants in tumor-normal pairs. + Somatic copy number alterations (CNAs) in tumor-normal exome data. + + +**Input** + +:: + + mpileup file - The SAMtools mpileup file + + +**Parameters** + +:: + + commands + mpileup2snp Identify SNPs from an mpileup file + mpileup2indel Identify indels an mpileup file + mpileup2cns Call consensus and variants from an mpileup file + + min-coverage + Minimum read depth at a position to make a call [8] + + min-reads2 + Minimum supporting reads at a position to call variants [2] + + min-avg-qual + Minimum base quality at a position to count a read [15] + + min-var-freq + Minimum variant allele frequency threshold [0.01] + + min-freq-for-hom + Minimum frequency to call homozygote [0.75] + + p-value + Default p-value threshold for calling variants [99e-02] + + strand-filter + Ignore variants with >90% support on one strand [1] + + output-vcf + If set to 1, outputs in VCF format + + vcf-sample-list + For VCF output, a list of sample names in order, one per line + + variants + Report only variant (SNP/indel) positions [0] + + + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan_somatic.pl Thu Jun 20 00:00:22 2013 -0400 @@ -0,0 +1,138 @@ +#!/usr/bin/perl + + +use strict; +use Cwd; + +die qq( +Bad numbr of inputs + +) if(!@ARGV); + +my $options =""; +my $normal=""; +my $command=""; +my $tumor=""; +my $output=""; +my $working_dir = cwd(); +my $snp = "$working_dir/output.snp.vcf"; +my $indels = "$working_dir/output.indel.vcf"; + +foreach my $input (@ARGV) +{ + my @tmp = split "::", $input; + if($tmp[0] eq "COMMAND") + { + $command = $tmp[1]; + } + if($tmp[0] eq "NORMAL") + { + $normal = $tmp[1]; + } + elsif($tmp[0] eq "TUMOR") + { + $tumor = $tmp[1]; + } + elsif($tmp[0] eq "OPTION") + { + $options = "$options ${tmp[1]}"; + } + elsif($tmp[0] eq "OUTPUT") + { + $output = $tmp[1]; + } + + else + { + die("Unknown Input: $input\n"); + } +} + +system ("$command $normal $tumor $options "); +system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snp"); + +my @chr_ord = chromosome_order($tumor); + +vs2vcf($snp, $output,\@chr_ord); + + +sub vs2vcf +{ + + # + # G l o b a l v a r i a b l e s + # + my $version = '0.1'; + + # + # Read in file + # + my $input = shift; + my $output = shift; + my $chr_ord = shift; + open(IN, $input) or die "Can't open $input': $!\n"; + open(OUT, ">$output") or die "Can't create $output': $!\n"; + my %output; + + while ( <IN> ) + { + if ( /^#/ ) + { + print OUT; + next; + } + chomp; + my $line = $_; + + my @flds = split ( "\t", $line ); + my $ref = $flds[3]; + my $alt = $flds[4]; + # + # Deletion of bases + # + if ( $alt =~ /^\-/ ) + { + ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref); + } + + # + # Insertion of bases + # + if ( $alt =~ /^\+/ ) + { + $flds[4] = $ref.substr($alt,1); + } + print OUT join( "\t", @flds),"\n" unless defined $chr_ord; + $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord; + } + close(IN); + # if chromosome order given return in sorted order + if(defined $chr_ord) + { + for my $chrom (@{ $chr_ord }) + { + for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) + { + print OUT $output{$chrom}{$pos}; + } + } + } + close(OUT); +} + + +sub chromosome_order +{ + my $input = shift; + # calculate flagstats + my $COMM = "samtools view -H $input | grep '^\@SQ'"; + my @SQ = `$COMM`; + chomp @SQ; + for(my $i = 0; $i <= $#SQ; $i++) + { + $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/; + } + return(@SQ); +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan_somatic.xml Thu Jun 20 00:00:22 2013 -0400 @@ -0,0 +1,131 @@ +<tool id="varscan_somatic" name="VarScan Somatic" version="2.3.5"> + <description> + somatic mutation caller for cancer genomics + </description> + <requirements> + <requirement type="package" version="2.3.5">VarScan</requirement> + </requirements> + <command interpreter="perl"> + varscan_somatic.pl + "COMMAND::java -jar \$JAVA_JAR_PATH/VarScan.v2.3.5.jar somatic" + "NORMAL::$normal" + "TUMOR::$tumor" + "OUTPUT::$output" + + "OPTION::--min-coverage $min_coverage" + "OPTION::--min-coverage-normal $min_coverage_normal" + "OPTION::--min-coverage-tumor $min_coverage_tumor" + + "OPTION::--min-var-freq $min_var_freq" + "OPTION::--min-freq-for-hom $min_freq_for_hom" + + "OPTION::--normal-purity $normal_purity" + "OPTION::--tumor-purity $tumor_purity" + + "OPTION::--p-value $p_value" + "OPTION::--somatic-p-value $somatic_p_value" + + "OPTION::--strand-filter $strand_filter" + "OPTION::--validation $validation" + "OPTION::--output-vcf 1" + + + + </command> + + <inputs> + + <param name="normal" type="data" format="pileup" label="normal mpileup file" help="The SAMtools mpileup file for normal sample" /> + <param name="tumor" type="data" format="pileup" label="tumor mpileup file" help="The SAMtools mpileup file for tumor sample" /> + + <param name="min_coverage" type="integer" label="min-coverage" help="" optional="true" value="8"/> + <param name="min_coverage_normal" type="integer" label="min-coverage-normal" help="" optional="true" value="8"/> + <param name="min_coverage_tumor" type="integer" label="min-coverage-tumor" help="" optional="true" value="6"/> + + <param name="min_var_freq" type="float" label="min-var-freq" help="" optional="true" value="0.10"/> + <param name="min_freq_for_hom" type="float" label="min-freq-for-hom" help="" optional="true" value="0.75"/> + + <param name="normal_purity" type="float" label="normal-purity" help="" optional="true" value="1.00"/> + <param name="tumor_purity" type="float" label="tumor-purity" help="" optional="true" value="1.00"/> + + + <param name="p_value" type="text" label="p-value" help="" optional="true" value="0.99"/> + <param name="somatic_p_value" type="text" label="somatic-p-value" help="" optional="true" value="0.05"/> + + <param name="strand_filter" type="integer" label="strand-filter" help="" optional="true" value="1"/> + <param name="validation" type="integer" label="validation" help="" optional="true" value="0"/> + + </inputs> + <outputs> + <data type="data" format="vcf" name="output" label="${tool.name} result on ${on_string}"/> + </outputs> + + <help> + +.. class:: infomark + +**What it does** + +:: + + VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. It can be used to detect different types of variation: + + Germline variants (SNPs an dindels) in individual samples or pools of samples. + Multi-sample variants (shared or private) in multi-sample datasets (with mpileup). + Somatic mutations, LOH events, and germline variants in tumor-normal pairs. + Somatic copy number alterations (CNAs) in tumor-normal exome data. + + +**Input** + +:: + + mpileup normal file - The SAMtools mpileup file for normal + mpileup tumor file - The SAMtools mpileup file for tumor + + +**Parameters** + +:: + + min-coverage + Minimum read depth at a position to make a call [8] + + min-coverage-normal + Minimum coverage in normal to call somatic [8] + + min-coverage-tumor + Minimum coverage in tumor to call somatic [6] + + min-var-freq + Minimum variant frequency to call a heterozygote [0.10] + + min-freq-for-hom + Minimum frequency to call homozygote [0.75] + + normal-purity + Estimated purity (non-tumor content) of normal sample [1.00] + + tumor-purity + Estimated purity (tumor content) of tumor sample [1.00] + + p-value + Default p-value threshold for calling variants [0.99] + + somatic-p-value + P-value threshold to call a somatic site [0.05] + + strand-filter + If set to 1, removes variants with >90% strand bias + + validation + If set to 1, outputs all compared positions even if non-variant + + output-vcf + If set to 1, outputs in VCF format [Default] + + + + </help> +</tool> +