Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
diff 2.4/src/SoftSearch_Filter.pl @ 18:1163c16cb3c0 draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Mon, 02 Jun 2014 07:35:53 -0400 |
parents | e3609c8714fb |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2.4/src/SoftSearch_Filter.pl Mon Jun 02 07:35:53 2014 -0400 @@ -0,0 +1,137 @@ +#!/usr/bin/perl -s +open (FILE,"$ARGV[0]")||usage();#die "Not using the right Parameters!\n\n"; +use Getopt::Long; +#Declare variables +my ($lsc,$minDist,$skip,$nSC,$nRP,$isize,$answer); +GetOptions( + 'dist:s' => \$minDist, #minimum distance between events + 'lsc:i' => \$lsc, #minimum somatic score + 'nsc:i' => \$nsc, #minimum depth of coverage in normal + 'nRP:i' => \$nRP, #minimum number of times it can be seen in tumor + 'isize:i' => \$isize, + 'sv:s' => \$sv, #whether or not to skip small deletions + 'q:s' => \$answer, #useful for plotting histograms + 'skip:s' => \$skip + ); +if(defined($lsc)){$lsc=$lsc} else {$lsc=0}; +if(defined($nsc)){$nsc=$nsc} else {$nsc=0}; +if(defined($nRP)){$nRP=$nRP} else {$nRP=0}; +if(defined($minDist)){$minDist=$minDist} else {$minDist=0}; +if(!$isize){$isize=0}; +if(!$uRP){$uRP=0}; + +if($answer eq "yes"){$answer=$answer} else {$answer="no"}; + +if ($answer eq "yes"){ +open(lsc,">lsc.out")||die; +open(nsc,">nsc.out")||die; +open(nRP,">nRP.out")||die; +} + + +#Remove hits if they are within $minDist +$chr="chr1";$pos=0; +while (<FILE>){ + if ($_=~/^#/){ + print; + next + }; + if ($skip){next if $_=~/$skip/} + @_=split(/\t/,$_); + #Get ISIZE from INFO field + my @info=split(/;/,$_[7]); + my $k = 0; + my $v = 0; + my $infoHash; + for (my $i=0;$i<=@info;$i++){ + my @tmp=split(/=/,$info[$i]); + $k=shift(@tmp); + $v=shift(@tmp); + $infoHash{$k}=$v; + } + + #Get the value of TYPE to find out how many reads support the event + my $counts = {CTX => 0, DEL => 0, INS => 0, INV => 0, TDUP => 0, NOV_INS => 0, lSC => 0, nSC => 0,uRP =>0,sDEL => 0,levD_local=>0,distl_levD => 0 }; + #Get Complete Hash + #@_[8] is format + #@_[9] is values + my @format=split(/:/, $_[8]); + my @sample=split(/:/,$_[9]); + my %hash; + @hash{@format}=@sample; + #Subset has to get proper type of variants + my $max_val = 0; + my $max_type = "NA"; + + #Get TYPEOF HASH + my %type; + %type = %hash ; + delete $type{'lSC'}; + delete $type{'nSC'}; + delete $type{'uRP'}; + delete $type{'levD_local'}; + delete $type{'distl_levD'}; + + while (my ($key,$val)=each(%type)){ + if($val > $max_val){$max_val=$val;$max_type=$key} + } + + +####################################################################################################### + #Start applying filters + + #Remove hits if they are within $minDist + $chrom=$_[0];$position=$_[1]; + + #next if chroms are same and distance is less than X + $difference=abs($pos-$position); + if(($chrom eq $chr)&&($difference < $minDist)){ + $pos=$position;$chr=$chrom;; + next} + $pos=$position;$chr=$chrom; + $EVENT_SIZE=$infoHash{'ISIZE'}; + $EVENT_TYPE=$max_type; + $EVENT_SUPPORT=$max_val; + $length_of_softClips=$hash{'lSC'}; + $number_of_softclips=$hash{'nSC'}; + $number_of_unmated=$hash{'uRP'}; + + ######################################################################## + #Print if all fileds are ok + next if($EVENT_SIZE < $isize); + next if($EVENT_SUPPORT < $nRP); + next if($length_of_softClips < $lsc); + next if($number_of_softclips < $nsc); + next if($number_of_unmated < $uRP); + next if (($sv)&&($EVENT_TYPE=~/sDEL/)); + print; + + + if ($answer eq "yes"){ + print lsc $length_of_softClips."\n"; + print nsc $number_of_softclips."\n"; + print nRP $EVENT_SUPPORT."\n"; + } +} + + +sub usage{ +print "\nUsage: Soft_SearchFilter.pl <VCF>\n + -dist #minimum distance between events [0] + -lsc #minimum length soft-clip [0] + -nsc #minimum number of soft-clip [0] + -nRP #minimum number of discordant read pairs [0] + -isize #minimum size [0] + -sv #skip small deletions [no|yes] + -skip #pipe-delimited list of strings to skip (e.g. chrM|chY|chrGL) + \n" +} + +#R +# lsc<-read.table("lsc.out") +# nsc<-read.table("nsc.out") +# nRP<-read.table("nRP.out") +# par(mfrow=c(2,2)) +# hist(lsc$V1,breaks=100) +# hist(nsc$V1,breaks=100) +# hist(nRP$V1,breaks=100)