# HG changeset patch
# User fcaramia
# Date 1371700822 14400
# Node ID 51969e284317cbea990064d86e37d04bd8143ea8
# Parent 893954763c0e5a3762ab37d87aa3567d7e870113
Uploaded
diff -r 893954763c0e -r 51969e284317 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,19 @@
+
+
+
+
+
+ http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.5.jar
+
+
+ $INSTALL_DIR/jars
+
+
+ $INSTALL_DIR/jars
+
+
+
+
+
+
+
diff -r 893954763c0e -r 51969e284317 varscan_mpileup.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_mpileup.pl Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,116 @@
+#!/usr/bin/perl
+
+use strict;
+use Cwd;
+
+die qq(
+Bad numbr of inputs
+
+) if(!@ARGV);
+
+my $options ="";
+my $file="";
+my $command="";
+my $output="";
+my $working_dir = cwd();
+my $temp_vcf = "$working_dir/temp";
+my $log="";
+
+foreach my $input (@ARGV)
+{
+ my @tmp = split "::", $input;
+ if($tmp[0] eq "COMMAND")
+ {
+ $command = $tmp[1];
+ }
+ elsif($tmp[0] eq "INPUT")
+ {
+ $file = $tmp[1];
+ }
+ elsif($tmp[0] eq "OPTION")
+ {
+ $options = "$options ${tmp[1]}";
+ }
+ elsif($tmp[0] eq "OUTPUT")
+ {
+ $output = $tmp[1];
+ }
+ elsif($tmp[0] eq "LOG")
+ {
+ $log = $tmp[1];
+ }
+ else
+ {
+ die("Unknown Input: $input\n");
+ }
+}
+
+system ("$command $file $options 1>$temp_vcf 2>$log");
+
+vs2vcf($temp_vcf, $output);
+
+
+sub vs2vcf
+{
+
+ #
+ # G l o b a l v a r i a b l e s
+ #
+ my $version = '0.1';
+
+ #
+ # Read in file
+ #
+ my $input = shift;
+ my $output = shift;
+ my $chr_ord = shift;
+ open(IN, $input) or die "Can't open $input': $!\n";
+ open(OUT, ">$output") or die "Can't create $output': $!\n";
+ my %output;
+
+ while ( )
+ {
+ if ( /^#/ )
+ {
+ print OUT;
+ next;
+ }
+ chomp;
+ my $line = $_;
+
+ my @flds = split ( "\t", $line );
+ my $ref = $flds[3];
+ my $alt = $flds[4];
+ #
+ # Deletion of bases
+ #
+ if ( $alt =~ /^\-/ )
+ {
+ ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
+ }
+
+ #
+ # Insertion of bases
+ #
+ if ( $alt =~ /^\+/ )
+ {
+ $flds[4] = $ref.substr($alt,1);
+ }
+ print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
+ $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
+ }
+ close(IN);
+ # if chromosome order given return in sorted order
+ if(defined $chr_ord)
+ {
+ for my $chrom (@{ $chr_ord })
+ {
+ for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} })
+ {
+ print OUT $output{$chrom}{$pos};
+ }
+ }
+ }
+ close(OUT);
+}
+
diff -r 893954763c0e -r 51969e284317 varscan_mpileup.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_mpileup.xml Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,124 @@
+
+
+ mutation caller for targeted, exome, and whole-genome resequencing
+
+
+ VarScan
+
+
+
+ varscan_mpileup.pl
+ "COMMAND::java -jar \$JAVA_JAR_PATH/VarScan.v2.3.5.jar $exe_command"
+ "INPUT::$in_file"
+ "OUTPUT::$output"
+ "LOG::$log"
+ "OPTION::--min-coverage $min_coverage"
+ "OPTION::--min-reads2 $min_reads2"
+ "OPTION::--min-avg-qual $min_avg_qual"
+ "OPTION::--min-var-freq $min_var_freq"
+ "OPTION::--min-freq-for-hom $min_freq_for_hom"
+ "OPTION::--p-value $p_value"
+ "OPTION::--strand-filter $strand_filter"
+ "OPTION::--output-vcf 1"
+
+ #if ($vcf_sample_list):
+ "OPTION::--vcf-sample-list $vcf_sample_list"
+ #end if
+ "OPTION::--variants $variants"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**What it does**
+
+::
+
+ VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. It can be used to detect different types of variation:
+
+ Germline variants (SNPs an dindels) in individual samples or pools of samples.
+ Multi-sample variants (shared or private) in multi-sample datasets (with mpileup).
+ Somatic mutations, LOH events, and germline variants in tumor-normal pairs.
+ Somatic copy number alterations (CNAs) in tumor-normal exome data.
+
+
+**Input**
+
+::
+
+ mpileup file - The SAMtools mpileup file
+
+
+**Parameters**
+
+::
+
+ commands
+ mpileup2snp Identify SNPs from an mpileup file
+ mpileup2indel Identify indels an mpileup file
+ mpileup2cns Call consensus and variants from an mpileup file
+
+ min-coverage
+ Minimum read depth at a position to make a call [8]
+
+ min-reads2
+ Minimum supporting reads at a position to call variants [2]
+
+ min-avg-qual
+ Minimum base quality at a position to count a read [15]
+
+ min-var-freq
+ Minimum variant allele frequency threshold [0.01]
+
+ min-freq-for-hom
+ Minimum frequency to call homozygote [0.75]
+
+ p-value
+ Default p-value threshold for calling variants [99e-02]
+
+ strand-filter
+ Ignore variants with >90% support on one strand [1]
+
+ output-vcf
+ If set to 1, outputs in VCF format
+
+ vcf-sample-list
+ For VCF output, a list of sample names in order, one per line
+
+ variants
+ Report only variant (SNP/indel) positions [0]
+
+
+
+
+
+
diff -r 893954763c0e -r 51969e284317 varscan_somatic.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_somatic.pl Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,138 @@
+#!/usr/bin/perl
+
+
+use strict;
+use Cwd;
+
+die qq(
+Bad numbr of inputs
+
+) if(!@ARGV);
+
+my $options ="";
+my $normal="";
+my $command="";
+my $tumor="";
+my $output="";
+my $working_dir = cwd();
+my $snp = "$working_dir/output.snp.vcf";
+my $indels = "$working_dir/output.indel.vcf";
+
+foreach my $input (@ARGV)
+{
+ my @tmp = split "::", $input;
+ if($tmp[0] eq "COMMAND")
+ {
+ $command = $tmp[1];
+ }
+ if($tmp[0] eq "NORMAL")
+ {
+ $normal = $tmp[1];
+ }
+ elsif($tmp[0] eq "TUMOR")
+ {
+ $tumor = $tmp[1];
+ }
+ elsif($tmp[0] eq "OPTION")
+ {
+ $options = "$options ${tmp[1]}";
+ }
+ elsif($tmp[0] eq "OUTPUT")
+ {
+ $output = $tmp[1];
+ }
+
+ else
+ {
+ die("Unknown Input: $input\n");
+ }
+}
+
+system ("$command $normal $tumor $options ");
+system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snp");
+
+my @chr_ord = chromosome_order($tumor);
+
+vs2vcf($snp, $output,\@chr_ord);
+
+
+sub vs2vcf
+{
+
+ #
+ # G l o b a l v a r i a b l e s
+ #
+ my $version = '0.1';
+
+ #
+ # Read in file
+ #
+ my $input = shift;
+ my $output = shift;
+ my $chr_ord = shift;
+ open(IN, $input) or die "Can't open $input': $!\n";
+ open(OUT, ">$output") or die "Can't create $output': $!\n";
+ my %output;
+
+ while ( )
+ {
+ if ( /^#/ )
+ {
+ print OUT;
+ next;
+ }
+ chomp;
+ my $line = $_;
+
+ my @flds = split ( "\t", $line );
+ my $ref = $flds[3];
+ my $alt = $flds[4];
+ #
+ # Deletion of bases
+ #
+ if ( $alt =~ /^\-/ )
+ {
+ ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
+ }
+
+ #
+ # Insertion of bases
+ #
+ if ( $alt =~ /^\+/ )
+ {
+ $flds[4] = $ref.substr($alt,1);
+ }
+ print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
+ $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
+ }
+ close(IN);
+ # if chromosome order given return in sorted order
+ if(defined $chr_ord)
+ {
+ for my $chrom (@{ $chr_ord })
+ {
+ for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} })
+ {
+ print OUT $output{$chrom}{$pos};
+ }
+ }
+ }
+ close(OUT);
+}
+
+
+sub chromosome_order
+{
+ my $input = shift;
+ # calculate flagstats
+ my $COMM = "samtools view -H $input | grep '^\@SQ'";
+ my @SQ = `$COMM`;
+ chomp @SQ;
+ for(my $i = 0; $i <= $#SQ; $i++)
+ {
+ $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
+ }
+ return(@SQ);
+}
+
+
diff -r 893954763c0e -r 51969e284317 varscan_somatic.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_somatic.xml Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,131 @@
+
+
+ somatic mutation caller for cancer genomics
+
+
+ VarScan
+
+
+ varscan_somatic.pl
+ "COMMAND::java -jar \$JAVA_JAR_PATH/VarScan.v2.3.5.jar somatic"
+ "NORMAL::$normal"
+ "TUMOR::$tumor"
+ "OUTPUT::$output"
+
+ "OPTION::--min-coverage $min_coverage"
+ "OPTION::--min-coverage-normal $min_coverage_normal"
+ "OPTION::--min-coverage-tumor $min_coverage_tumor"
+
+ "OPTION::--min-var-freq $min_var_freq"
+ "OPTION::--min-freq-for-hom $min_freq_for_hom"
+
+ "OPTION::--normal-purity $normal_purity"
+ "OPTION::--tumor-purity $tumor_purity"
+
+ "OPTION::--p-value $p_value"
+ "OPTION::--somatic-p-value $somatic_p_value"
+
+ "OPTION::--strand-filter $strand_filter"
+ "OPTION::--validation $validation"
+ "OPTION::--output-vcf 1"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**What it does**
+
+::
+
+ VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. It can be used to detect different types of variation:
+
+ Germline variants (SNPs an dindels) in individual samples or pools of samples.
+ Multi-sample variants (shared or private) in multi-sample datasets (with mpileup).
+ Somatic mutations, LOH events, and germline variants in tumor-normal pairs.
+ Somatic copy number alterations (CNAs) in tumor-normal exome data.
+
+
+**Input**
+
+::
+
+ mpileup normal file - The SAMtools mpileup file for normal
+ mpileup tumor file - The SAMtools mpileup file for tumor
+
+
+**Parameters**
+
+::
+
+ min-coverage
+ Minimum read depth at a position to make a call [8]
+
+ min-coverage-normal
+ Minimum coverage in normal to call somatic [8]
+
+ min-coverage-tumor
+ Minimum coverage in tumor to call somatic [6]
+
+ min-var-freq
+ Minimum variant frequency to call a heterozygote [0.10]
+
+ min-freq-for-hom
+ Minimum frequency to call homozygote [0.75]
+
+ normal-purity
+ Estimated purity (non-tumor content) of normal sample [1.00]
+
+ tumor-purity
+ Estimated purity (tumor content) of tumor sample [1.00]
+
+ p-value
+ Default p-value threshold for calling variants [0.99]
+
+ somatic-p-value
+ P-value threshold to call a somatic site [0.05]
+
+ strand-filter
+ If set to 1, removes variants with >90% strand bias
+
+ validation
+ If set to 1, outputs all compared positions even if non-variant
+
+ output-vcf
+ If set to 1, outputs in VCF format [Default]
+
+
+
+
+
+