changeset 2:51969e284317 draft default tip

Uploaded
author fcaramia
date Thu, 20 Jun 2013 00:00:22 -0400
parents 893954763c0e
children
files tool_dependencies.xml varscan_mpileup.pl varscan_mpileup.xml varscan_somatic.pl varscan_somatic.xml
diffstat 5 files changed, 528 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="VarScan" version="2.3.5">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.5.jar</action>
+                <action type="move_file">
+                    <source>VarScan.v2.3.5.jar</source>
+                    <destination>$INSTALL_DIR/jars</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="JAVA_JAR_PATH" action="set_to">$INSTALL_DIR/jars</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        </readme>
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_mpileup.pl	Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,116 @@
+#!/usr/bin/perl
+
+use strict;
+use Cwd;
+
+die qq(
+Bad numbr of inputs
+
+) if(!@ARGV);
+
+my $options ="";
+my $file="";
+my $command="";
+my $output="";
+my $working_dir = cwd();
+my $temp_vcf = "$working_dir/temp";
+my $log="";
+
+foreach my $input (@ARGV) 
+{
+	my @tmp = split "::", $input;
+	if($tmp[0] eq "COMMAND") 
+	{
+		$command = $tmp[1];
+	} 
+	elsif($tmp[0] eq "INPUT") 
+	{
+		$file = $tmp[1];
+	}
+	elsif($tmp[0] eq "OPTION") 
+	{
+		$options = "$options ${tmp[1]}";
+	}
+	elsif($tmp[0] eq "OUTPUT") 
+	{
+		$output = $tmp[1];
+	}
+	elsif($tmp[0] eq "LOG") 
+	{
+		$log = $tmp[1];
+	}
+	else 
+	{
+		die("Unknown Input: $input\n");
+	}
+}
+
+system ("$command $file $options 1>$temp_vcf 2>$log");
+
+vs2vcf($temp_vcf, $output);
+
+
+sub vs2vcf 
+{
+
+	#
+	# G l o b a l     v a r i a b l e s 
+	#
+	my $version = '0.1';
+
+	#
+	# Read in file
+	#
+	my $input = shift;
+	my $output = shift;
+	my $chr_ord = shift;
+	open(IN, $input) or die "Can't open $input': $!\n";
+	open(OUT, ">$output") or die "Can't create $output': $!\n";
+	my %output;
+
+	while ( <IN> )
+	{
+		if ( /^#/ )
+		{
+			print OUT;
+			next;
+		}
+		chomp;
+		my $line = $_;
+
+		my @flds = split ( "\t", $line );
+		my $ref = $flds[3];
+		my $alt = $flds[4];
+		#
+		# Deletion of bases
+		#
+		if ( $alt =~ /^\-/ )
+		{
+			($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
+		}
+
+		#
+		# Insertion of bases
+		#
+		if ( $alt =~ /^\+/ )
+		{
+			$flds[4] = $ref.substr($alt,1);
+		}
+		print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
+		$output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
+	}
+	close(IN);
+	# if chromosome order given return in sorted order
+	if(defined $chr_ord) 
+	{
+		for my $chrom (@{ $chr_ord }) 
+		{
+			for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) 
+			{
+				print OUT $output{$chrom}{$pos};
+			}
+		}
+	}
+	close(OUT);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_mpileup.xml	Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,124 @@
+<tool id="varscan_mpileup" name="VarScan mpileup" version="2.3.5">
+  <description>
+        mutation caller for targeted, exome, and whole-genome resequencing
+  </description>
+  <requirements>
+  	<requirement type="package" version="2.3.5">VarScan</requirement>
+  </requirements>
+  <command interpreter="perl">
+  	
+	varscan_mpileup.pl 
+	"COMMAND::java -jar \$JAVA_JAR_PATH/VarScan.v2.3.5.jar $exe_command" 
+  	"INPUT::$in_file"
+  	"OUTPUT::$output"
+  	"LOG::$log"
+	"OPTION::--min-coverage $min_coverage"
+	"OPTION::--min-reads2 $min_reads2"
+	"OPTION::--min-avg-qual $min_avg_qual"
+	"OPTION::--min-var-freq $min_var_freq"
+	"OPTION::--min-freq-for-hom $min_freq_for_hom"
+	"OPTION::--p-value $p_value"	
+	"OPTION::--strand-filter $strand_filter"	
+	"OPTION::--output-vcf 1"	
+	
+	#if ($vcf_sample_list):
+		"OPTION::--vcf-sample-list $vcf_sample_list"
+	#end if
+	"OPTION::--variants $variants"	
+	
+	
+  	
+  </command>
+
+  <inputs>
+  	
+	<param name="exe_command" type="select" label="Command" help="" optional="false">
+		<option value="mpileup2snp" >mpileup2snp</option>
+		<option value="mpileup2indel">mpileup2indel</option>
+		<option value="mpileup2cns">mpileup2cns</option>
+	</param>
+	<param name="in_file" type="data" format="pileup" label="mpileup file" help="The SAMtools mpileup file" />
+	<param name="min_coverage" type="integer" label="min-coverage" help="" optional="true" value="8"/>
+	<param name="min_reads2" type="integer" label="min-reads2" help="" optional="true" value="2"/>
+	<param name="min_avg_qual" type="integer" label="min-avg-qual" help="" optional="true" value="15"/>
+	<param name="min_var_freq" type="float" label="min-var-freq" help="" optional="true" value="0.01"/>
+	<param name="min_freq_for_hom" type="float" label="min-freq-for-hom" help="" optional="true" value="0.75"/>
+	<param name="p_value" type="text" label="p-value" help="" optional="true" value="0.99"/>
+	<param name="strand_filter" type="integer" label="strand-filter" help="" optional="true" value="1"/>
+	<param name="vcf_sample_list" type="data" label="vcf-sample-list" format="txt" help="" optional="true" />
+	<param name="variants" type="integer" label="variants" help="Set to 1 to report only variants" optional="true" value="1"/>
+	
+	
+  </inputs>
+  <outputs>
+  	<data type="data" format="vcf" name="output" label="${tool.name} result on ${on_string}"/>
+  	<data type="data" format="txt" name="log" label="${tool.name} result on ${on_string} (log) "/>
+  </outputs>
+  	
+  <help> 
+
+.. class:: infomark
+
+**What it does**
+
+::
+
+ VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most  operating systems. It can be used to detect different types of variation:
+
+    Germline variants (SNPs an dindels) in individual samples or pools of samples.
+    Multi-sample variants (shared or private) in multi-sample datasets (with mpileup).
+    Somatic mutations, LOH events, and germline variants in tumor-normal pairs.
+    Somatic copy number alterations (CNAs) in tumor-normal exome data.
+
+
+**Input**
+
+::
+
+  mpileup file - The SAMtools mpileup file
+ 
+
+**Parameters**
+
+::
+
+  commands
+	mpileup2snp		Identify SNPs from an mpileup file
+	mpileup2indel		Identify indels an mpileup file
+	mpileup2cns		Call consensus and variants from an mpileup file
+
+  min-coverage	
+  	Minimum read depth at a position to make a call [8]
+
+  min-reads2	
+  	Minimum supporting reads at a position to call variants [2]
+
+  min-avg-qual	
+  	Minimum base quality at a position to count a read [15]
+
+  min-var-freq	
+        Minimum variant allele frequency threshold [0.01]
+
+  min-freq-for-hom
+  	Minimum frequency to call homozygote [0.75]
+  
+  p-value
+  	Default p-value threshold for calling variants [99e-02]
+  
+  strand-filter
+  	Ignore variants with >90% support on one strand [1]
+  
+  output-vcf
+  	If set to 1, outputs in VCF format
+  
+  vcf-sample-list
+  	For VCF output, a list of sample names in order, one per line
+  
+  variants
+  	Report only variant (SNP/indel) positions [0]
+
+
+  
+  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_somatic.pl	Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,138 @@
+#!/usr/bin/perl
+
+
+use strict;
+use Cwd;
+
+die qq(
+Bad numbr of inputs
+
+) if(!@ARGV);
+
+my $options ="";
+my $normal="";
+my $command="";
+my $tumor="";
+my $output="";
+my $working_dir = cwd();
+my $snp = "$working_dir/output.snp.vcf";
+my $indels = "$working_dir/output.indel.vcf";
+
+foreach my $input (@ARGV) 
+{
+	my @tmp = split "::", $input;
+	if($tmp[0] eq "COMMAND") 
+	{
+		$command = $tmp[1];
+	} 
+	if($tmp[0] eq "NORMAL") 
+	{
+		$normal = $tmp[1];
+	} 
+	elsif($tmp[0] eq "TUMOR") 
+	{
+		$tumor = $tmp[1];
+	}
+	elsif($tmp[0] eq "OPTION") 
+	{
+		$options = "$options ${tmp[1]}";
+	}
+	elsif($tmp[0] eq "OUTPUT") 
+	{
+		$output = $tmp[1];
+	}
+	
+	else 
+	{
+		die("Unknown Input: $input\n");
+	}
+}
+
+system ("$command $normal $tumor $options ");
+system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snp");
+
+my @chr_ord = chromosome_order($tumor);
+
+vs2vcf($snp, $output,\@chr_ord);
+
+
+sub vs2vcf 
+{
+
+	#
+	# G l o b a l     v a r i a b l e s 
+	#
+	my $version = '0.1';
+
+	#
+	# Read in file
+	#
+	my $input = shift;
+	my $output = shift;
+	my $chr_ord = shift;
+	open(IN, $input) or die "Can't open $input': $!\n";
+	open(OUT, ">$output") or die "Can't create $output': $!\n";
+	my %output;
+
+	while ( <IN> )
+	{
+		if ( /^#/ )
+		{
+			print OUT;
+			next;
+		}
+		chomp;
+		my $line = $_;
+
+		my @flds = split ( "\t", $line );
+		my $ref = $flds[3];
+		my $alt = $flds[4];
+		#
+		# Deletion of bases
+		#
+		if ( $alt =~ /^\-/ )
+		{
+			($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
+		}
+
+		#
+		# Insertion of bases
+		#
+		if ( $alt =~ /^\+/ )
+		{
+			$flds[4] = $ref.substr($alt,1);
+		}
+		print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
+		$output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
+	}
+	close(IN);
+	# if chromosome order given return in sorted order
+	if(defined $chr_ord) 
+	{
+		for my $chrom (@{ $chr_ord }) 
+		{
+			for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) 
+			{
+				print OUT $output{$chrom}{$pos};
+			}
+		}
+	}
+	close(OUT);
+}
+
+
+sub chromosome_order 
+{
+	my $input = shift;
+	# calculate flagstats
+	my $COMM = "samtools view -H $input | grep '^\@SQ'";
+	my @SQ = `$COMM`;
+	chomp @SQ;
+	for(my $i = 0; $i <= $#SQ; $i++) 
+	{
+		$SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
+	} 
+	return(@SQ);
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_somatic.xml	Thu Jun 20 00:00:22 2013 -0400
@@ -0,0 +1,131 @@
+<tool id="varscan_somatic" name="VarScan Somatic" version="2.3.5">
+  <description>
+        somatic mutation caller for cancer genomics
+  </description>
+  <requirements>
+  	<requirement type="package" version="2.3.5">VarScan</requirement>
+  </requirements>
+  <command interpreter="perl">
+  	varscan_somatic.pl 
+  	"COMMAND::java -jar \$JAVA_JAR_PATH/VarScan.v2.3.5.jar somatic" 
+  	"NORMAL::$normal" 
+  	"TUMOR::$tumor"
+  	"OUTPUT::$output"
+  	
+	"OPTION::--min-coverage $min_coverage"
+	"OPTION::--min-coverage-normal $min_coverage_normal"
+	"OPTION::--min-coverage-tumor $min_coverage_tumor"
+	
+	"OPTION::--min-var-freq $min_var_freq"
+	"OPTION::--min-freq-for-hom $min_freq_for_hom"
+	
+	"OPTION::--normal-purity $normal_purity"	
+	"OPTION::--tumor-purity $tumor_purity"		
+	
+	"OPTION::--p-value $p_value"	
+	"OPTION::--somatic-p-value $somatic_p_value"	
+
+	"OPTION::--strand-filter $strand_filter"	
+	"OPTION::--validation $validation"	
+	"OPTION::--output-vcf 1"	
+	
+
+  	
+  </command>
+
+  <inputs>
+
+	<param name="normal" type="data" format="pileup" label="normal mpileup file" help="The SAMtools mpileup file for normal sample" />
+	<param name="tumor" type="data" format="pileup" label="tumor mpileup file" help="The SAMtools mpileup file for tumor sample" />
+
+	<param name="min_coverage" type="integer" label="min-coverage" help="" optional="true" value="8"/>
+	<param name="min_coverage_normal" type="integer" label="min-coverage-normal" help="" optional="true" value="8"/>
+	<param name="min_coverage_tumor" type="integer" label="min-coverage-tumor" help="" optional="true" value="6"/>
+	
+	<param name="min_var_freq" type="float" label="min-var-freq" help="" optional="true" value="0.10"/>
+	<param name="min_freq_for_hom" type="float" label="min-freq-for-hom" help="" optional="true" value="0.75"/>
+	
+	<param name="normal_purity" type="float" label="normal-purity" help="" optional="true" value="1.00"/>
+	<param name="tumor_purity" type="float" label="tumor-purity" help="" optional="true" value="1.00"/>
+	
+
+	<param name="p_value" type="text" label="p-value" help="" optional="true" value="0.99"/>
+	<param name="somatic_p_value" type="text" label="somatic-p-value" help="" optional="true" value="0.05"/>
+
+	<param name="strand_filter" type="integer" label="strand-filter" help="" optional="true" value="1"/>
+	<param name="validation" type="integer" label="validation" help="" optional="true" value="0"/>
+	
+  </inputs>
+  <outputs>
+  	<data type="data" format="vcf" name="output" label="${tool.name} result on ${on_string}"/>
+  </outputs>
+  	
+  <help> 
+
+.. class:: infomark
+
+**What it does**
+
+::
+
+ VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most  operating systems. It can be used to detect different types of variation:
+
+    Germline variants (SNPs an dindels) in individual samples or pools of samples.
+    Multi-sample variants (shared or private) in multi-sample datasets (with mpileup).
+    Somatic mutations, LOH events, and germline variants in tumor-normal pairs.
+    Somatic copy number alterations (CNAs) in tumor-normal exome data.
+
+
+**Input**
+
+::
+
+  mpileup normal file - The SAMtools mpileup file for normal
+  mpileup tumor file - The SAMtools mpileup file for tumor
+ 
+
+**Parameters**
+
+::
+
+  min-coverage	
+  	Minimum read depth at a position to make a call [8]
+
+  min-coverage-normal	
+  	Minimum coverage in normal to call somatic [8]
+  	
+  min-coverage-tumor	
+  	Minimum coverage in tumor to call somatic [6]
+  	
+  min-var-freq 
+  	Minimum variant frequency to call a heterozygote [0.10]  	  	
+
+  min-freq-for-hom
+  	Minimum frequency to call homozygote [0.75]
+  	
+  normal-purity 
+  	Estimated purity (non-tumor content) of normal sample [1.00]
+  	
+  tumor-purity
+  	Estimated purity (tumor content) of tumor sample [1.00]
+  
+  p-value
+  	Default p-value threshold for calling variants [0.99]
+  	
+  somatic-p-value
+  	P-value threshold to call a somatic site [0.05]  	
+  
+  strand-filter
+  	If set to 1, removes variants with >90% strand bias
+  	
+  validation 
+  	If set to 1, outputs all compared positions even if non-variant
+  
+  output-vcf
+  	If set to 1, outputs in VCF format [Default]
+
+
+  
+  </help>
+</tool>
+