annotate 2.4/script/blat_parse.pl @ 18:1163c16cb3c0 draft

Uploaded
author plus91-technologies-pvt-ltd
date Mon, 02 Jun 2014 07:35:53 -0400
parents e3609c8714fb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 #####################################################################################################################################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2 #Purpose: To parse blat psl file
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 #Date: 07-30-2013
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4 #####################################################################################################################################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 use Getopt::Long;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 use Cwd;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7 #reading input arguments
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 &Getopt::Long::GetOptions(
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9 'b|BLAT_OUT=s'=> \$blat_out,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10 'temp:s'=>\$dirtemp,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 'f|FASTA=s'=>\$infast,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12 );
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 $blat_out =~ s/\s|\t|\r|\n//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 $dirtemp =~ s/\s|\t|\r|\n//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 $infast =~ s/\s|\t|\r|\n//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16 $samtools=`which samtools`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17 $samtools =~ s/\s|\t|\r|\n//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19 if($blat_out eq "" || $infast eq "" )
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 die "Try: perl blat_parse.pl -b <PSL FILE> -f <Contigs.fa>
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22 -temp temporary file directory
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25 if (!(-e $samtools))
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 die "samtools must be in your path\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 if (!(-e $infast))
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 die "input fasta file doesn't exit\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34 unless(-d $dirtemp)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 #system("mkdir -p $dirtemp");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 $dirtemp= getcwd;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 #opening the blat output file
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 open(BUFF,$blat_out) or die "no file found $blat_out\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 open(WRBUFF,">$dirtemp/Temp_out.txt") or die "not able to write the file \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42 #parsing throught he file
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 while(<BUFF>)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 if($_ =~ m/^\d/)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 print WRBUFF $_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 else
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 print "ignoring headers $.\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 close(WRBUFF);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55 system("sort -k10,10 -k18,18n $dirtemp/Temp_out.txt > $dirtemp/Temp_out1.txt");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56 system("mv $dirtemp/Temp_out1.txt $dirtemp/Temp_out.txt");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 open(BUFF,"$dirtemp/Temp_out.txt") or die "no file found Temp_out.txt\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 open(WRBUFF,">$dirtemp/File1_out.txt") or die "not able to write the file \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 close(WRBUFF);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 $prev_contig_name="";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62 my @temp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 #parsing throught he file
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 while(<BUFF>)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 chomp($_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68 split "\t";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 if($_[9] ne $prev_contig_name)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 if($prev_contig_name ne "")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 #print @temp."\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74 #print @temp."\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75 &processing(@temp);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77 undef(@temp);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78 push(@temp,$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 else
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
81 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
82 push(@temp,$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
83 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
84 $prev_contig_name=$_[9];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
85
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
86
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
87 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
88 #processing last record
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
89 &processing(@temp);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
90 #print @temp."\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
91 close(BUFF);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
92
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
93
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
94
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
95
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
96 ##################SUBROUTINES######################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
97 #actual processing of each record in the temp array(same query name objects)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
98
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
99 sub processing {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
100 open(WRBUFF,">>$dirtemp/File1_out.txt") or die "not able to write the file \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
101 open(BAD_CONTIG,">>$dirtemp/bad_contig.out.txt") or die "not able to write the file \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
102
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
103 @temp = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
104 #if number of hits for a contig is one
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
105 if(@temp == 1)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
106 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
107 $i=0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
108 #define blocksizes array
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
109 @row=split("\t",$temp[$i]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
110 $row[18] =~ s/,$//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
111 @blockSizes=split(',',$row[18]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
112 #defining var
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
113 $qSize=$row[10];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
114 $qStart=$row[11];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
115 $qStop=$row[12];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
116 $tstart=$row[15];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
117 $tstop=$row[16];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
118 $Strand=$row[8];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
119 $coverage = $row[9];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
120 $coverage =~ s/\w+_//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
121 #calculate match val
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
122 if(($qSize-($qStop-$qStart)) ==0)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
123 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
124 $flag=1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
125 #these ara non informative
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
126 if (@blockSizes ==1)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
127 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
128 print "ignoring one of the event $row[9] $i as the event is non informative \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
129 print BAD_CONTIG "$row[9]\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
130 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
131 #Ignoring when number of blocks are more than two
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
132 if(@blockSizes > 2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
133 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
134 print "ignoring event $row[9] $. AS BLOCK SIZE is greater than 2\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
135 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
136 #if number of blocks is equal to 2
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
137 if(@blockSizes == 2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
138 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
139 $temp1=$tstart+$blockSizes[0]+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
140 $temp2=$tstop-$blockSizes[1]-1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
141
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
142 print WRBUFF "$row[9]\t$row[13]\t$temp1\t$Strand\t$row[13]\t$temp2\t$Strand\t$coverage\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
143 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
144 $i=@temp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
145 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
146 #later part missing
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
147 elsif($qStart ==0)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
148 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
149 $temp1=$tstart+$blockSizes[0]+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
150 $infast_chr=$infast;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
151 $infast_chr=~ s/\.fa//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
152 $infast_chr_start=$qStop+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
153 $infast_chr_stop=$qSize;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
154 $sys="$samtools faidx $infast $infast_chr:$infast_chr_start-$infast_chr_stop";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
155
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
156 $sys = `$sys`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
157 chomp($sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
158 @sys=split("\n",$sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
159 $INSERTION="";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
160 for($i=1;$i<@sys;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
161 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
162 $INSERTION=$INSERTION.$sys[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
163 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
164 $INSERTION_LENGTH=length($INSERTION);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
165 $temp1=$tstart+$blockSizes[0]+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
166 print WRBUFF "$row[9]\t$row[13]\t$temp1\t$Strand\tUNKNOWN\tUNKNOWN\t$Strand\t$coverage\t$INSERTION\t$INSERTION_LENGTH\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
167
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
168 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
169 #intial part missing
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
170 elsif($qStop == $qSize)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
171 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
172 $temp1=$tstart;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
173 $infast_chr=$infast;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
174 $infast_chr=~ s/\.fa//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
175 $infast_chr_start=0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
176 $infast_chr_stop=$qStart;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
177 $sys ="$samtools faidx $infast $infast_chr:$infast_chr_start-$infast_chr_stop";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
178 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
179 $sys = `$sys`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
180 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
181 chomp($sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
182 @sys=split("\n",$sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
183 $INSERTION="";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
184 for( $i=1;$i<@sys;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
185 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
186 $INSERTION=$INSERTION.$sys[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
187 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
188 $INSERTION_LENGTH=length($INSERTION);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
189 $temp1=$tstart+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
190 print WRBUFF "$row[9]\tUNKNOWN\tUNKNOWN\t$Strand\t$row[13]\t$temp1\t$Strand\t$coverage\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
191
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
192 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
193 else
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
194 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
195 print "ignoring one of the event $row[9] $i as the event is non informative \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
196 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
197
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
198 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
199 #if number of hits for a contig is greater than one
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
200 else
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
201 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
202 #this flag is used to see if perfect hit not found (match val =0)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
203 $flag1 = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
204 for(my $i=0;$i<@temp;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
205 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
206
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
207 #define blocksizes array
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
208 @row=split("\t",$temp[$i]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
209 $row[18] =~ s/,$//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
210 @blockSizes=split(',',$row[18]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
211 #defining var
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
212 $qSize=$row[10];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
213 $qStart=$row[11];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
214 $qStop=$row[12];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
215 $tstart=$row[15];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
216 $tstop=$row[16];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
217 $Strand=$row[8];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
218 $coverage = $row[9];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
219 $coverage =~ s/\w+_//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
220 #calculate match val
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
221 if(($qSize-($qStop-$qStart)) ==0)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
222 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
223 $flag1=1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
224 #these ara non informative
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
225 if (@blockSizes ==1)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
226 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
227 print "ignoring one of the event $row[9] $i as the event is non informative \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
228 print BAD_CONTIG "$row[9]\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
229 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
230 #Ignoring when number of blocks are more than two
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
231 if(@blockSizes > 2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
232 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
233 print "ignoring event $row[9] $. AS BLOCK SIZE is greater than 2\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
234 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
235 if(@blockSizes == 2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
236 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
237 $temp1=$tstart+$blockSizes[0]+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
238 $temp2=$tstop-$blockSizes[1]-1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
239
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
240 print WRBUFF "$row[9]\t$row[13]\t$temp1\t$Strand\t$row[13]\t$temp2\t$Strand\t$coverage\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
241 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
242 $i=@temp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
243 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
244 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
245 #as flag value not changed proceed to see next step
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
246 if($flag1 == 0)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
247 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
248 undef(@initial);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
249 my @initial;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
250 for(my $i=0;$i<@temp;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
251 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
252 @row=split("\t",$temp[$i]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
253 #print "@row\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
254 unshift(@initial,[@row]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
255 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
256 #sortin the hits according to qstart & qend
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
257 @initial = sort {$a->[11] <=> $b->[11] || $b->[12] <=> $a->[12]} @initial;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
258 #print "$row[9]\t@initial\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
259 #if($row[9] eq "NODE_5_length_149_cov_12.395973")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
260 #{
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
261 # for($i=0;$i<@initial;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
262 # {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
263 # print "@{$initial[$i]}\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
264 # }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
265 #}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
266 $start = "";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
267 $stop = "";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
268 $start_len=0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
269 $stop_len=0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
270 #this super flag is used to skip processing of remaining uncessary hits
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
271 $super_flag = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
272 for($i=0;$i<@initial && $super_flag == 0;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
273 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
274 $flag = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
275 #print "@{$initial[$i]}\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
276 $initial[$i][18] =~ s/,$//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
277 @blockSizes1=split(',',$initial[$i][18]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
278 #defining var
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
279 $qSize1=$initial[$i][10];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
280 $qStart1=$initial[$i][11];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
281 $qStop1=$initial[$i][12];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
282 $tstart1=$initial[$i][15];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
283 $tstop1=$initial[$i][16];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
284 $Strand1=$initial[$i][8];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
285 $Chr1 = $initial[$i][13];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
286 $coverage1 = $initial[$i][9];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
287 $coverage1 =~ s/\w+_//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
288 #die "$qSize1\t$qStart1\t$qStop1\t$tstart1\t$tstop1\t$Strand1\t$Chr1\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
289 #if a hit qstart = 0 then set flag =1
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
290 if($qStart1 == 0)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
291 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
292 $flag =1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
293 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
294 #if a hit qstop = 0 then set flag =2
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
295 if($qStop1 == $qSize1)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
296 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
297 $flag =2;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
298 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
299 #if($row[9] eq "NODE_5_length_149_cov_12.395973")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
300 #{
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
301 # print "$flag \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
302 #}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
303 if(@blockSizes1 == 1)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
304 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
305 if($flag == 1 )
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
306 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
307 for($j=0;$j<@initial;$j++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
308 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
309 #both hits should not be the same
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
310 if($i != $j)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
311 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
312 #print "@{$initial[$i]}\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
313 $initial[$j][18] =~ s/,$//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
314 @blockSizes2=split(',',$initial[$j][18]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
315 #defining var
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
316 $qSize2=$initial[$j][10];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
317 $qStart2=$initial[$j][11];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
318 $qStop2=$initial[$j][12];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
319 $tstart2=$initial[$j][15];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
320 $tstop2=$initial[$j][16];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
321 $Strand2=$initial[$j][8];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
322 $coverage2 = $initial[$j][9];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
323 $Chr2 = $initial[$j][13];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
324 $coverage2 =~ s/\w+_//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
325 #making sure both hits are not over lapping
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
326 if($qStart2 > $qStart1)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
327 { #allowing +-2 bases as the this hit is immediate next continous hit
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
328 if($qStop1 >= $qStart2 -2 && $qStop1 <= $qStart2 +2 )
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
329 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
330 #perfect match
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
331 if($qStop2 == $qSize2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
332 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
333 if($Strand1 eq "+")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
334 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
335 $tmp1 = $tstart1+$blockSizes1[0]+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
336 $tmp2 = $tstart2+$blockSizes2[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
337 print WRBUFF "$initial[$i][9]\t$Chr1\t$tmp1\t$Strand1\t$Chr2\t$tmp2\t$Strand2\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
338 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
339 else
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
340 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
341 $tmp1 = $tstart1+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
342 $tmp2 = $tstart2+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
343 print WRBUFF "$initial[$i][9]\t$Chr1\t$tmp1\t$Strand1\t$Chr2\t$tmp2\t$Strand2\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
344
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
345 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
346 $super_flag = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
347 $j = @initial+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
348 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
349 #some part is missing after the second hit
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
350 else
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
351 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
352 $tmp1 = $tstart1+$blockSizes1[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
353 $tmp2 = $tstart2+$blockSizes2[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
354 $INSERTION="";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
355 $infast_chr=$infast;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
356 $infast_chr=~ s/\.fa//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
357 $infast_chr_start=$qStop1+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
358 $infast_chr_stop=$qStart2-1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
359 $sys ="$samtools faidx $infast $infast_chr:$infast_chr_start-$infast_chr_stop";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
360 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
361 $sys = `$sys`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
362 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
363 chomp($sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
364 @sys=split("\n",$sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
365 for( $i=1;$i<@sys;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
366 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
367 $INSERTION=$INSERTION.$sys[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
368 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
369 $INSERTION_LENGTH=length($INSERTION);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
370 print WRBUFF "$initial[$i][9]\t$Chr1\t$tmp1\t$Strand1\t$Chr2\t$tmp2\t$Strand2\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
371 $super_flag = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
372 $j = @initial+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
373 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
374
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
375 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
376 #if there are some insertion between two hits
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
377 elsif($qStop2 == $qSize2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
378 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
379 $tmp1 = $tstart1+$blockSizes1[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
380 $tmp2 = $tstart2+$blockSizes2[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
381 $INSERTION="";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
382 $infast_chr=$infast;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
383 $infast_chr=~ s/\.fa//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
384 $infast_chr_start=$qStop2+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
385 $infast_chr_stop=$qSize;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
386 $sys ="$samtools faidx $infast $infast_chr:$infast_chr_start-$infast_chr_stop";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
387 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
388 $sys = `$sys`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
389 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
390 chomp($sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
391 @sys=split("\n",$sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
392 for( $i=1;$i<@sys;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
393 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
394 $INSERTION=$INSERTION.$sys[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
395 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
396 $INSERTION_LENGTH=length($INSERTION);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
397 print WRBUFF "$initial[$i][9]\t$Chr1\t$tmp1\t$Strand1\t$Chr2\t$tmp2\t$Strand2\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
398 $super_flag = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
399 $j = @initial+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
400 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
401
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
402 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
403
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
404 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
405 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
406 #if none worked with other reads then only process that read
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
407 if($j == @initial)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
408 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
409 #die "success\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
410 $temp1=$tstart1+$blockSizes1[0]+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
411 #print WRBUFF "$Chr1\t$temp1\t$Strand1\tUNKNOWN\tUNKNOWN\t$Strand\t$coverage\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
412 $infast_chr=$infast;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
413 $infast_chr=~ s/\.fa//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
414 $infast_chr_start=$qStop1+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
415 $infast_chr_stop=$qSize1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
416 $sys ="$samtools faidx $infast $infast_chr:$infast_chr_start-$infast_chr_stop";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
417 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
418 $sys = `$sys`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
419 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
420 chomp($sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
421 @sys=split("\n",$sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
422 $INSERTION="";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
423 for( $i=1;$i<@sys;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
424 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
425 $INSERTION=$INSERTION.$sys[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
426 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
427 $INSERTION_LENGTH=length($INSERTION);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
428 print WRBUFF "$initial[$i][9]\t$Chr1\t$temp1\t$Strand1\tUNKNOWN\tUNKNOWN\t$Strand1\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
429 $super_flag = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
430 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
431 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
432 #if query end is matched to query size
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
433 elsif($flag == 2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
434 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
435 #going through other hits
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
436 for($j=0;$j<@initial;$j++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
437 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
438 #hits should not be same
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
439 if($i != $j && $qStop2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
440 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
441 #print "@{$initial[$i]}\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
442 $initial[$j][18] =~ s/,$//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
443 @blockSizes2=split(',',$initial[$j][18]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
444 #defining var
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
445 $qSize2=$initial[$j][10];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
446 $qStart2=$initial[$j][11];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
447 $qStop2=$initial[$j][12];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
448 $tstart2=$initial[$j][15];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
449 $tstop2=$initial[$j][16];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
450 $Strand2=$initial[$j][8];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
451 $coverage2 = $initial[$j][9];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
452 $Chr2 = $initial[$j][13];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
453 $coverage2 =~ s/\w+_//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
454 #if
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
455 if($qStop2 < $qStop1)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
456 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
457 if($qStart1 >= $qStop2 -2 && $qStart1 <= $qStop2 +2 )
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
458 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
459 #die "$qStart1 <= $qStop2 \n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
460 $tmp1 = $tstart1+$blockSizes1[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
461 $tmp2 = $tstart2+$blockSizes2[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
462 $INSERTION="";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
463 $infast_chr=$infast;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
464 $infast_chr=~ s/\.fa//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
465 $infast_chr_start=0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
466 $infast_chr_stop=$qStart1-1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
467 $sys ="$samtools faidx $infast $infast_chr:$infast_chr_start-$infast_chr_stop";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
468 #die "test $sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
469 $sys = `$sys`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
470 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
471 chomp($sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
472 @sys=split("\n",$sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
473 for( $i=1;$i<@sys;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
474 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
475 $INSERTION=$INSERTION.$sys[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
476 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
477 $INSERTION_LENGTH=length($INSERTION);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
478 print WRBUFF "$initial[$i][9]\t$Chr2\t$tmp2\t$Strand2\t$Chr1\t$tmp1\t$Strand1\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
479 $super_flag = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
480 $j = @initial+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
481
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
482 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
483
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
484 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
485 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
486 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
487 if($j == @initial)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
488 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
489 $infast_chr=$infast;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
490 $infast_chr=~ s/\.fa//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
491 $infast_chr_start=0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
492 $infast_chr_stop=$qStart1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
493 $sys ="$samtools faidx $infast $infast_chr:$infast_chr_start-$infast_chr_stop";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
494 #die "test $sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
495 $sys = `$sys`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
496 #die "$sys\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
497 chomp($sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
498 @sys=split("\n",$sys);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
499 $INSERTION="";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
500 for( $i=1;$i<@sys;$i++)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
501 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
502 $INSERTION=$INSERTION.$sys[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
503 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
504 $INSERTION_LENGTH=length($INSERTION);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
505 $tmp = $tstart1+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
506 print WRBUFF "$initial[$i][9]\tUNKNOWN\tUNKNOWN\t$Strand1\t$Chr1\t$tmp\t$Strand1\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
507 $super_flag = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
508 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
509 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
510 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
511 elsif(@blockSizes == 2)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
512 {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
513 $temp1=$tstart1+$blockSizes[0]+1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
514 $temp2=$tstop1-$blockSizes[1]-1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
515 print WRBUFF "$initial[$i][9]\t$Chr1\t$temp1\t$Strand1\t$Chr1\t$temp2\t$Strand1\t$coverage1\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
516
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
517 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
518 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
519 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
520
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
521 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
522 close(WRBUFF);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
523
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
524 undef(@temp);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
525 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
526