Mercurial > repos > plus91-technologies-pvt-ltd > softsearch
comparison 2.4/src/SoftSearch_Filter.pl @ 13:e3609c8714fb draft
Uploaded
author | plus91-technologies-pvt-ltd |
---|---|
date | Fri, 30 May 2014 03:37:55 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12:980fce54e60a | 13:e3609c8714fb |
---|---|
1 #!/usr/bin/perl -s | |
2 open (FILE,"$ARGV[0]")||usage();#die "Not using the right Parameters!\n\n"; | |
3 use Getopt::Long; | |
4 #Declare variables | |
5 my ($lsc,$minDist,$skip,$nSC,$nRP,$isize,$answer); | |
6 GetOptions( | |
7 'dist:s' => \$minDist, #minimum distance between events | |
8 'lsc:i' => \$lsc, #minimum somatic score | |
9 'nsc:i' => \$nsc, #minimum depth of coverage in normal | |
10 'nRP:i' => \$nRP, #minimum number of times it can be seen in tumor | |
11 'isize:i' => \$isize, | |
12 'sv:s' => \$sv, #whether or not to skip small deletions | |
13 'q:s' => \$answer, #useful for plotting histograms | |
14 'skip:s' => \$skip | |
15 ); | |
16 if(defined($lsc)){$lsc=$lsc} else {$lsc=0}; | |
17 if(defined($nsc)){$nsc=$nsc} else {$nsc=0}; | |
18 if(defined($nRP)){$nRP=$nRP} else {$nRP=0}; | |
19 if(defined($minDist)){$minDist=$minDist} else {$minDist=0}; | |
20 if(!$isize){$isize=0}; | |
21 if(!$uRP){$uRP=0}; | |
22 | |
23 if($answer eq "yes"){$answer=$answer} else {$answer="no"}; | |
24 | |
25 if ($answer eq "yes"){ | |
26 open(lsc,">lsc.out")||die; | |
27 open(nsc,">nsc.out")||die; | |
28 open(nRP,">nRP.out")||die; | |
29 } | |
30 | |
31 | |
32 #Remove hits if they are within $minDist | |
33 $chr="chr1";$pos=0; | |
34 while (<FILE>){ | |
35 if ($_=~/^#/){ | |
36 print; | |
37 next | |
38 }; | |
39 if ($skip){next if $_=~/$skip/} | |
40 @_=split(/\t/,$_); | |
41 #Get ISIZE from INFO field | |
42 my @info=split(/;/,$_[7]); | |
43 my $k = 0; | |
44 my $v = 0; | |
45 my $infoHash; | |
46 for (my $i=0;$i<=@info;$i++){ | |
47 my @tmp=split(/=/,$info[$i]); | |
48 $k=shift(@tmp); | |
49 $v=shift(@tmp); | |
50 $infoHash{$k}=$v; | |
51 } | |
52 | |
53 #Get the value of TYPE to find out how many reads support the event | |
54 my $counts = {CTX => 0, DEL => 0, INS => 0, INV => 0, TDUP => 0, NOV_INS => 0, lSC => 0, nSC => 0,uRP =>0,sDEL => 0,levD_local=>0,distl_levD => 0 }; | |
55 #Get Complete Hash | |
56 #@_[8] is format | |
57 #@_[9] is values | |
58 my @format=split(/:/, $_[8]); | |
59 my @sample=split(/:/,$_[9]); | |
60 my %hash; | |
61 @hash{@format}=@sample; | |
62 #Subset has to get proper type of variants | |
63 my $max_val = 0; | |
64 my $max_type = "NA"; | |
65 | |
66 #Get TYPEOF HASH | |
67 my %type; | |
68 %type = %hash ; | |
69 delete $type{'lSC'}; | |
70 delete $type{'nSC'}; | |
71 delete $type{'uRP'}; | |
72 delete $type{'levD_local'}; | |
73 delete $type{'distl_levD'}; | |
74 | |
75 while (my ($key,$val)=each(%type)){ | |
76 if($val > $max_val){$max_val=$val;$max_type=$key} | |
77 } | |
78 | |
79 | |
80 ####################################################################################################### | |
81 #Start applying filters | |
82 | |
83 #Remove hits if they are within $minDist | |
84 $chrom=$_[0];$position=$_[1]; | |
85 | |
86 #next if chroms are same and distance is less than X | |
87 $difference=abs($pos-$position); | |
88 if(($chrom eq $chr)&&($difference < $minDist)){ | |
89 $pos=$position;$chr=$chrom;; | |
90 next} | |
91 $pos=$position;$chr=$chrom; | |
92 $EVENT_SIZE=$infoHash{'ISIZE'}; | |
93 $EVENT_TYPE=$max_type; | |
94 $EVENT_SUPPORT=$max_val; | |
95 $length_of_softClips=$hash{'lSC'}; | |
96 $number_of_softclips=$hash{'nSC'}; | |
97 $number_of_unmated=$hash{'uRP'}; | |
98 | |
99 ######################################################################## | |
100 #Print if all fileds are ok | |
101 next if($EVENT_SIZE < $isize); | |
102 next if($EVENT_SUPPORT < $nRP); | |
103 next if($length_of_softClips < $lsc); | |
104 next if($number_of_softclips < $nsc); | |
105 next if($number_of_unmated < $uRP); | |
106 next if (($sv)&&($EVENT_TYPE=~/sDEL/)); | |
107 print; | |
108 | |
109 | |
110 if ($answer eq "yes"){ | |
111 print lsc $length_of_softClips."\n"; | |
112 print nsc $number_of_softclips."\n"; | |
113 print nRP $EVENT_SUPPORT."\n"; | |
114 } | |
115 } | |
116 | |
117 | |
118 sub usage{ | |
119 print "\nUsage: Soft_SearchFilter.pl <VCF>\n | |
120 -dist #minimum distance between events [0] | |
121 -lsc #minimum length soft-clip [0] | |
122 -nsc #minimum number of soft-clip [0] | |
123 -nRP #minimum number of discordant read pairs [0] | |
124 -isize #minimum size [0] | |
125 -sv #skip small deletions [no|yes] | |
126 -skip #pipe-delimited list of strings to skip (e.g. chrM|chY|chrGL) | |
127 \n" | |
128 } | |
129 | |
130 #R | |
131 # lsc<-read.table("lsc.out") | |
132 # nsc<-read.table("nsc.out") | |
133 # nRP<-read.table("nRP.out") | |
134 # par(mfrow=c(2,2)) | |
135 # hist(lsc$V1,breaks=100) | |
136 # hist(nsc$V1,breaks=100) | |
137 # hist(nRP$V1,breaks=100) |