Mercurial > repos > geert-vandeweyer > varscan_wrapper
diff varscan/varscan_processSomatic.pl @ 4:572397bbe057 draft default tip
Added Xmx10G setting to both varscan_somatic wrappers
author | geert-vandeweyer |
---|---|
date | Wed, 26 Mar 2014 05:24:22 -0400 |
parents | 848f3dc54593 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan/varscan_processSomatic.pl Wed Mar 26 05:24:22 2014 -0400 @@ -0,0 +1,223 @@ +#!/usr/bin/perl + + +use strict; +use Cwd; + +die qq( +Bad numbr of inputs + +) if(!@ARGV); + +my $options =""; +my $command=""; +my $input=""; +#my $output=""; +my $working_dir = cwd(); +my $log = ''; +my %outfiles; +foreach my $arg (@ARGV) +{ + my @tmp = split "::", $arg; + if($tmp[0] eq "COMMAND") + { + $command = $tmp[1]; + } + elsif($tmp[0] eq "INPUT") + { + $input = $tmp[1]; + } + elsif($tmp[0] eq "OPTION") + { + my @p = split(/\s+/,$tmp[1]); + $options = "$options ${tmp[1]}"; + } + elsif($tmp[0] eq "OUTPUT") + { + my @p = split(/\s+/,$tmp[1]); + $p[0] = substr($p[0],2); + $outfiles{$p[0]} = $p[1]; + } + + elsif($tmp[0] eq "LOG") + { + $log = $tmp[1]; + } + else + { + die("Unknown Input: $input\n"); + } +} +system("ln -s $input $working_dir/in.dat"); + + + +## RUN +$command = "$command $working_dir/in.dat $options > $log 2>&1"; +system($command); + +## if tabular files are kept, write them to galaxy-datafile +if ($outfiles{'loh'} ne 'None') { + ## + system("cp '$working_dir/in.dat.LOH' '".$outfiles{'loh'}."'"); + system("cp '$working_dir/in.dat.LOH.hc' '".$outfiles{'loh'}."'"); + system("cp '$working_dir/in.dat.Germline' '".$outfiles{'germ'}."'"); + system("cp '$working_dir/in.dat.Germline.hc' '".$outfiles{'germ_hc'}."'"); + system("cp '$working_dir/in.dat.Somatic' '".$outfiles{'som'}."'"); + system("cp '$working_dir/in.dat.Somatic.hc' '".$outfiles{'som_hc'}."'"); +} +## if vcf files are kept, generate them, and write to galaxy-datafile +if ($outfiles{'loh_hc_vcf'} ne 'None') { + tab2vcf($working_dir,'in.dat.LOH.hc',$outfiles{'loh_hc_vcf'}); + tab2vcf($working_dir,'in.dat.Germline.hc',$outfiles{'germ_hc_vcf'}); + tab2vcf($working_dir,'in.dat.Somatic.hc',$outfiles{'som_hc_vcf'}); +} +exit; + +sub tab2vcf +{ + my $wd = shift; + my $in = shift; + my $out = shift; + open OUT, ">$out"; + print OUT "##fileformat=VCFv4.1\n"; + print OUT "##source=processSomatic\n"; + print OUT '##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">'."\n"; + print OUT '##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (Germline,Somatic,LOH)">'."\n"; + print OUT '##INFO=<ID=VPV,Number=1,Type=Float,Description="Variant P-value">'."\n"; + print OUT '##INFO=<ID=SPV,Number=1,Type=Float,Description="Somatic Variant P-value">'."\n"; + print OUT '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">'."\n"; + print OUT '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">'."\n"; + print OUT '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">'."\n"; + print OUT '##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">'."\n"; + print OUT '##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">'."\n"; + print OUT '##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">'."\n"; + print OUT '##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">'."\n"; + print OUT '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR'."\n"; + open IN, "$wd/$in"; + my $head = <IN>; + while (<IN>) { + chomp; + my @p = split(/\t/,$_); + my $td = $p[4] + $p[5] + $p[8] + $p[9]; + my $vpv = sprintf('%.3E',$p[13]); + my $spv = sprintf('%.3E',$p[14]); + my $info = "DP=$td;SS=$p[12];VPV=$vpv;SPV=$spv"; + ## gt normal string + my $gtnormal = ''; + if ("$p[7]" eq "$p[2]") { + $gtnormal = "0/0"; + } + elsif ("$p[7]" eq "$p[3]") { + $gtnormal = "1/1"; + } + else { + $gtnormal = "0/1"; + } + my $nsum = $p[4] + $p[5]; + $gtnormal .= ":.:$nsum:$p[4]:$p[5]:$p[6]:$p[19],$p[20],$p[21],$p[22]"; + ## gt tumor string + my $gttumor = ''; + if ("$p[11]" eq "$p[2]") { + $gttumor = "0/0"; + } + elsif ("$p[11]" eq "$p[3]") { + $gttumor = "1/1"; + } + else { + $gttumor = "0/1"; + } + my $tsum = $p[8] + $p[9]; + $gttumor .= ":.:$tsum:$p[8]:$p[9]:$p[10]:$p[15],$p[16],$p[17],$p[18]"; + + ## outline + my $line = "$p[0]\t$p[1]\t.\t$p[2]\t$p[3]\t.\tPASS\t$info\tGT:GQ:DP:RD:AD:FREQ:DP4\t$gtnormal\t$gttumor\n"; + print OUT $line; + } + close IN; + close OUT; +} +sub vs2vcf +{ + + # + # G l o b a l v a r i a b l e s + # + my $version = '0.1'; + + # + # Read in file + # + my $input = shift; + my $output = shift; + my $chr_ord = shift; + open(IN, $input) or die "Can't open $input': $!\n"; + open(OUT, ">$output") or die "Can't create $output': $!\n"; + my %output; + + while ( <IN> ) + { + if ( /^#/ ) + { + print OUT; + next; + } + chomp; + my $line = $_; + + my @flds = split ( "\t", $line ); + my $ref = $flds[3]; + my $alt = $flds[4]; + # + # Deletion of bases + # + if ( $alt =~ /^\-/ ) + { + ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref); + } + + # + # Insertion of bases + # + if ( $alt =~ /^\+/ ) + { + $flds[4] = $ref.substr($alt,1); + } + ## insert dot for reference positions. + if ($flds[4] eq '') { + $flds[4] = '.'; + } + print OUT join( "\t", @flds),"\n" unless defined $chr_ord; + $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord; + } + close(IN); + # if chromosome order given return in sorted order + if(defined $chr_ord) + { + for my $chrom (@{ $chr_ord }) + { + for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) + { + print OUT $output{$chrom}{$pos}; + } + } + } + close(OUT); +} + + +sub chromosome_order +{ + my $input = shift; + # calculate flagstats + my $COMM = "samtools view -H $input | grep '^\@SQ'"; + my @SQ = `$COMM`; + chomp @SQ; + for(my $i = 0; $i <= $#SQ; $i++) + { + $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/; + } + return(@SQ); +} + +