13
|
1 #!/usr/bin/perl -s
|
|
2 open (FILE,"$ARGV[0]")||usage();#die "Not using the right Parameters!\n\n";
|
|
3 use Getopt::Long;
|
|
4 #Declare variables
|
|
5 my ($lsc,$minDist,$skip,$nSC,$nRP,$isize,$answer);
|
|
6 GetOptions(
|
|
7 'dist:s' => \$minDist, #minimum distance between events
|
|
8 'lsc:i' => \$lsc, #minimum somatic score
|
|
9 'nsc:i' => \$nsc, #minimum depth of coverage in normal
|
|
10 'nRP:i' => \$nRP, #minimum number of times it can be seen in tumor
|
|
11 'isize:i' => \$isize,
|
|
12 'sv:s' => \$sv, #whether or not to skip small deletions
|
|
13 'q:s' => \$answer, #useful for plotting histograms
|
|
14 'skip:s' => \$skip
|
|
15 );
|
|
16 if(defined($lsc)){$lsc=$lsc} else {$lsc=0};
|
|
17 if(defined($nsc)){$nsc=$nsc} else {$nsc=0};
|
|
18 if(defined($nRP)){$nRP=$nRP} else {$nRP=0};
|
|
19 if(defined($minDist)){$minDist=$minDist} else {$minDist=0};
|
|
20 if(!$isize){$isize=0};
|
|
21 if(!$uRP){$uRP=0};
|
|
22
|
|
23 if($answer eq "yes"){$answer=$answer} else {$answer="no"};
|
|
24
|
|
25 if ($answer eq "yes"){
|
|
26 open(lsc,">lsc.out")||die;
|
|
27 open(nsc,">nsc.out")||die;
|
|
28 open(nRP,">nRP.out")||die;
|
|
29 }
|
|
30
|
|
31
|
|
32 #Remove hits if they are within $minDist
|
|
33 $chr="chr1";$pos=0;
|
|
34 while (<FILE>){
|
|
35 if ($_=~/^#/){
|
|
36 print;
|
|
37 next
|
|
38 };
|
|
39 if ($skip){next if $_=~/$skip/}
|
|
40 @_=split(/\t/,$_);
|
|
41 #Get ISIZE from INFO field
|
|
42 my @info=split(/;/,$_[7]);
|
|
43 my $k = 0;
|
|
44 my $v = 0;
|
|
45 my $infoHash;
|
|
46 for (my $i=0;$i<=@info;$i++){
|
|
47 my @tmp=split(/=/,$info[$i]);
|
|
48 $k=shift(@tmp);
|
|
49 $v=shift(@tmp);
|
|
50 $infoHash{$k}=$v;
|
|
51 }
|
|
52
|
|
53 #Get the value of TYPE to find out how many reads support the event
|
|
54 my $counts = {CTX => 0, DEL => 0, INS => 0, INV => 0, TDUP => 0, NOV_INS => 0, lSC => 0, nSC => 0,uRP =>0,sDEL => 0,levD_local=>0,distl_levD => 0 };
|
|
55 #Get Complete Hash
|
|
56 #@_[8] is format
|
|
57 #@_[9] is values
|
|
58 my @format=split(/:/, $_[8]);
|
|
59 my @sample=split(/:/,$_[9]);
|
|
60 my %hash;
|
|
61 @hash{@format}=@sample;
|
|
62 #Subset has to get proper type of variants
|
|
63 my $max_val = 0;
|
|
64 my $max_type = "NA";
|
|
65
|
|
66 #Get TYPEOF HASH
|
|
67 my %type;
|
|
68 %type = %hash ;
|
|
69 delete $type{'lSC'};
|
|
70 delete $type{'nSC'};
|
|
71 delete $type{'uRP'};
|
|
72 delete $type{'levD_local'};
|
|
73 delete $type{'distl_levD'};
|
|
74
|
|
75 while (my ($key,$val)=each(%type)){
|
|
76 if($val > $max_val){$max_val=$val;$max_type=$key}
|
|
77 }
|
|
78
|
|
79
|
|
80 #######################################################################################################
|
|
81 #Start applying filters
|
|
82
|
|
83 #Remove hits if they are within $minDist
|
|
84 $chrom=$_[0];$position=$_[1];
|
|
85
|
|
86 #next if chroms are same and distance is less than X
|
|
87 $difference=abs($pos-$position);
|
|
88 if(($chrom eq $chr)&&($difference < $minDist)){
|
|
89 $pos=$position;$chr=$chrom;;
|
|
90 next}
|
|
91 $pos=$position;$chr=$chrom;
|
|
92 $EVENT_SIZE=$infoHash{'ISIZE'};
|
|
93 $EVENT_TYPE=$max_type;
|
|
94 $EVENT_SUPPORT=$max_val;
|
|
95 $length_of_softClips=$hash{'lSC'};
|
|
96 $number_of_softclips=$hash{'nSC'};
|
|
97 $number_of_unmated=$hash{'uRP'};
|
|
98
|
|
99 ########################################################################
|
|
100 #Print if all fileds are ok
|
|
101 next if($EVENT_SIZE < $isize);
|
|
102 next if($EVENT_SUPPORT < $nRP);
|
|
103 next if($length_of_softClips < $lsc);
|
|
104 next if($number_of_softclips < $nsc);
|
|
105 next if($number_of_unmated < $uRP);
|
|
106 next if (($sv)&&($EVENT_TYPE=~/sDEL/));
|
|
107 print;
|
|
108
|
|
109
|
|
110 if ($answer eq "yes"){
|
|
111 print lsc $length_of_softClips."\n";
|
|
112 print nsc $number_of_softclips."\n";
|
|
113 print nRP $EVENT_SUPPORT."\n";
|
|
114 }
|
|
115 }
|
|
116
|
|
117
|
|
118 sub usage{
|
|
119 print "\nUsage: Soft_SearchFilter.pl <VCF>\n
|
|
120 -dist #minimum distance between events [0]
|
|
121 -lsc #minimum length soft-clip [0]
|
|
122 -nsc #minimum number of soft-clip [0]
|
|
123 -nRP #minimum number of discordant read pairs [0]
|
|
124 -isize #minimum size [0]
|
|
125 -sv #skip small deletions [no|yes]
|
|
126 -skip #pipe-delimited list of strings to skip (e.g. chrM|chY|chrGL)
|
|
127 \n"
|
|
128 }
|
|
129
|
|
130 #R
|
|
131 # lsc<-read.table("lsc.out")
|
|
132 # nsc<-read.table("nsc.out")
|
|
133 # nRP<-read.table("nRP.out")
|
|
134 # par(mfrow=c(2,2))
|
|
135 # hist(lsc$V1,breaks=100)
|
|
136 # hist(nsc$V1,breaks=100)
|
|
137 # hist(nRP$V1,breaks=100)
|