annotate genephys/MergeBlastResults.pl @ 3:8dfa09868059 draft

Uploaded
author mcharles
date Fri, 24 Oct 2014 05:54:20 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
2 #V1.0.3 header added
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
3 #V1.0.2 suppressed the final sort (very heavyload) and replaced it by another level of hash
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
4 #V1.0.1 added log, option parameters
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
5 use strict;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
6 use warnings;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
7 use Getopt::Long;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
8
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
9 my $inputblast;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
10 my $outputjoin;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
11 my $log_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
12 my $MAX_OVERLAP_FRACTION = 0.5;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
13 my $MAX_OVERLAP_LENGTH_IGNORED = 3;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
14 my $VERBOSE = "OFF";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
15 my $ALLOWED_GAP_FRACTION_FOR_MERGING = 0.3;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
16 my $HEADER ="";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
17
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
18 GetOptions (
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
19 "input_blasttab_file=s" => \$inputblast,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
20 "output_joinmatch_file=s" => \$outputjoin,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
21 "log_file=s" => \$log_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
22 "header=s" => \$HEADER,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
23 "max_overlap_fraction=f" => \$MAX_OVERLAP_FRACTION,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
24 "max_overlap_length_ignored=i" =>\$MAX_OVERLAP_LENGTH_IGNORED
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
25 ) or die("Error in command line arguments\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
26
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
27 open(IB, $inputblast) or die ("Can't open $inputblast \n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
28 open (LF,">$log_file") or die("Can't open $log_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
29
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
30
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
31 my %match_by_query;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
32
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
33 my @query_keys;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
34 my %querys;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
35
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
36
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
37 my $stats_nb_match=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
38 my $stats_included=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
39 my $stats_large_overlapping=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
40 my %stats_query_coverage;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
41 $stats_query_coverage{"0-10%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
42 $stats_query_coverage{"10-20%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
43 $stats_query_coverage{"20-30%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
44 $stats_query_coverage{"30-40%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
45 $stats_query_coverage{"40-50%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
46 $stats_query_coverage{"50-60%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
47 $stats_query_coverage{"60-70%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
48 $stats_query_coverage{"70-80%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
49 $stats_query_coverage{"80-90%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
50 $stats_query_coverage{"90-100%"}=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
51
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
52 my $current_query="";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
53 while (my $ligne = <IB>){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
54 my @fields = split (/\t/,$ligne);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
55 if ($#fields != 9){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
56 print STDERR "Invalid blasttab format, must have 10 columns\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
57 exit(0);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
58 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
59 $stats_nb_match++;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
60 my %match;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
61 $match{"Query"}=$fields[0];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
62 if (!$querys{$match{"Query"}}){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
63 push(@query_keys,$match{"Query"});
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
64 $querys{$match{"Query"}} = 1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
65 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
66 $match{"Subject_id"}=$fields[1];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
67 $match{"Orientation"}="+";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
68 $match{"Query_start"}=$fields[2];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
69 $match{"Query_end"}=$fields[3];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
70 $match{"Subject_start"}=$fields[4];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
71 $match{"Subject_end"}=$fields[5];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
72
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
73
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
74 if ($fields[2]>$fields[3]){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
75 $match{"Query_start"}=$fields[3];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
76 $match{"Query_end"}=$fields[2];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
77 $match{"Orientation"}="-";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
78 #print "- $ligne";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
79 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
80
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
81 if ($fields[4]>$fields[5]){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
82 $match{"Subject_start"}=$fields[5];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
83 $match{"Subject_end"}=$fields[4];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
84 $match{"Orientation"}="-";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
85 #print "- $ligne";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
86 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
87 $match{"Similarity"}=$fields[6];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
88 $match{"Query_length"}=$fields[7];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
89 $match{"Subject_length"}=$fields[8];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
90 chomp($fields[9]);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
91 $match{"Subject"}=$fields[9];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
92
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
93 $match{"Ligne"}=$ligne;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
94
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
95 my $querykey = $match{"Query"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
96 my $key = $match{"Query"}."##".$match{"Subject"}."##".$match{"Orientation"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
97 if ($match{"Subject_length"}==0){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
98 print LF "Match 0",$ligne,"\n",$match{"Subject_length"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
99 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
100 my %match_by_query_and_subject;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
101 my @match_table;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
102
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
103 if ($match_by_query{$querykey}){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
104 %match_by_query_and_subject = %{$match_by_query{$querykey}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
105 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
106 if ($match_by_query_and_subject{$key}){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
107 @match_table=@{$match_by_query_and_subject{$key}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
108 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
109
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
110 push (@match_table,\%match);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
111 $match_by_query_and_subject{$key} = \@match_table;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
112 $match_by_query{$querykey}= \%match_by_query_and_subject;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
113 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
114
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
115 close (IB);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
116
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
117 #print LF "NB query : $#query_keys\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
118 #foreach my $querykey (sort @query_keys){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
119 # my %current_match_by_query_and_subject = %{$match_by_query{$querykey}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
120 # foreach my $key (sort {$a cmp $b} keys %current_match_by_query_and_subject){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
121 # my @current_match_table = sort sortbyquerycoord @{$current_match_by_query_and_subject{$key}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
122 # for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
123 # my %current_match = %{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
124 # print LF $current_match{"Ligne"}."\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
125 # }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
126 # }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
127 #}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
128 #exit(0);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
129
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
130 open (OJ, ">$outputjoin") or die ("Can't open $outputjoin \n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
131 print OJ "##",$HEADER,"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
132 print OJ "#Query\tSubject_Id\torientation\tQuery_coverage\tSubject_coverage\tIdentity\tmin_query\tmax_query\tmin_subject\tmax_subject\tNBmatch\tq_length\tsub_length\tsubject\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
133
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
134 foreach my $querykey (sort @query_keys){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
135 my %current_match_by_query_and_subject = %{$match_by_query{$querykey}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
136 my @match_joined; #la table qui contient les matchs (hash) projeté de chaque subject pour une query
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
137 foreach my $key (sort {$a cmp $b} keys %current_match_by_query_and_subject){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
138 my @current_match_table = sort sortbyquerycoord @{$current_match_by_query_and_subject{$key}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
139 my @duplicate;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
140 my @overlap;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
141 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
142 push (@duplicate,0);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
143 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
144 if ($VERBOSE eq "ON"){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
145 print LF "\nTable Match ($#current_match_table)\t$key\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
146 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
147 my %match=%{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
148 print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
149 print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
150 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
151 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
152
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
153 #Scan d'inclusion strict
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
154 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
155 my %match1=%{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
156 for (my $j=0;$j<=$#current_match_table;$j++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
157 if (($j != $i)&&($duplicate[$j]==0)){ # On scan dans les deux sens, pas seulement $j = $i+1 a cause du last;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
158 my %match2=%{$current_match_table[$j]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
159 # Inclus Subject
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
160 if (($match1{"Subject_start"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"}))
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
161 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
162 $duplicate[$i]=1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
163 last;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
164 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
165 # Inclus Query
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
166 elsif (($match1{"Query_start"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"}))
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
167 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
168 $duplicate[$i]=2;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
169 last;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
170 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
171
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
172 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
173 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
174 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
175
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
176 my @current_match_table_filtered;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
177 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
178 if ($duplicate[$i] == 0){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
179 push (@current_match_table_filtered,$current_match_table[$i]);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
180 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
181 else{
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
182 $stats_included++;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
183 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
184 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
185
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
186 if ($#current_match_table > $#current_match_table_filtered){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
187 @current_match_table = @current_match_table_filtered;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
188 if ($VERBOSE eq "ON"){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
189 print LF "Table Match filtered 1 ($#current_match_table)\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
190 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
191 my %match=%{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
192 print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
193 print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
194 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
195 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
196 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
197
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
198 undef @duplicate;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
199 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
200 push (@duplicate,0);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
201 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
202 ########
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
203
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
204 ###Scan d'overlap trop important
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
205 # D'abord subject
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
206 for (my $i=0;$i<=$#current_match_table-1;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
207 my %match1=%{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
208 for (my $j=$i+1;$j<=$#current_match_table;$j++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
209 my %match2=%{$current_match_table[$j]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
210 if (($match1{"Subject_start"}<=$match2{"Subject_start"})&&($match1{"Subject_end"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"}))
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
211 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
212 my $overlap_length = $match1{"Subject_end"} - $match2{"Subject_start"}+1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
213 my $length1 = $match1{"Subject_end"} - $match1{"Subject_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
214 my $length2 = $match2{"Subject_end"} - $match2{"Subject_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
215
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
216 if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
217 $duplicate[$j]=1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
218 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
219 elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
220 $duplicate[$i]=1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
221 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
222 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
223 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
224 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
225 elsif (($match2{"Subject_start"}<=$match1{"Subject_start"})&&($match2{"Subject_end"}>=$match1{"Subject_start"})&&($match2{"Subject_end"}<=$match1{"Subject_end"})) #Recherche d'inclusion dans les deux sens car els match sont classé par query coord, par par subject coord
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
226 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
227 my $overlap_length = $match2{"Subject_end"} - $match1{"Subject_start"}+1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
228 my $length1 = $match1{"Subject_end"} - $match1{"Subject_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
229 my $length2 = $match2{"Subject_end"} - $match2{"Subject_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
230
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
231 if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
232 $duplicate[$j]=1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
233 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
234 elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
235 $duplicate[$i]=1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
236 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
237 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
238 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
239 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
240 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
241 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
242
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
243 undef @current_match_table_filtered;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
244 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
245 if ($duplicate[$i] == 0){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
246 push (@current_match_table_filtered,$current_match_table[$i]);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
247 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
248 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
249 $stats_large_overlapping++;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
250 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
251 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
252
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
253 if ($#current_match_table > $#current_match_table_filtered){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
254 @current_match_table = @current_match_table_filtered;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
255 if ($VERBOSE eq "ON"){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
256 print LF "Table Match filtered 2 ($#current_match_table)\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
257 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
258 my %match=%{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
259 print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
260 print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
261 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
262 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
263
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
264 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
265
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
266 undef @duplicate;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
267 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
268 push (@duplicate,0);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
269 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
270
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
271 # Ensuite Query (Subject puis Query pour evitez des deletions complementaires query update qui enleverait toutes les entrées)
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
272
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
273 for (my $i=0;$i<=$#current_match_table-1;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
274 my %match1=%{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
275 for (my $j=$i+1;$j<=$#current_match_table;$j++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
276 my %match2=%{$current_match_table[$j]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
277 if (($match1{"Query_start"}<=$match2{"Query_start"})&&($match1{"Query_end"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"}))
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
278 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
279 my $overlap_length = $match1{"Query_end"} - $match2{"Query_start"}+1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
280 my $length1 = $match1{"Query_end"} - $match1{"Query_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
281 my $length2 = $match2{"Query_end"} - $match2{"Query_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
282
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
283 if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
284 $duplicate[$j]=1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
285 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
286 elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
287 $duplicate[$i]=1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
288 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
289 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
290 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
291 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
292 #un seul sans les query sont classés
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
293 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
294 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
295
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
296 undef @current_match_table_filtered;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
297 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
298 if ($duplicate[$i] == 0){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
299 push (@current_match_table_filtered,$current_match_table[$i]);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
300 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
301 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
302 $stats_large_overlapping++;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
303 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
304 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
305
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
306 if ($#current_match_table > $#current_match_table_filtered){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
307 @current_match_table = @current_match_table_filtered;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
308 if ($VERBOSE eq "ON"){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
309 print LF "Table Match filtered 3 ($#current_match_table)\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
310 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
311 my %match=%{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
312 print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
313 print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
314 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
315 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
316 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
317
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
318
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
319
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
320 #Fusion des Hsp et Calcul des nouvelles metriques
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
321
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
322 my $overlap_length = 0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
323 my $Subject_coverage;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
324 my $Query_coverage;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
325 my $Identity;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
326 my $nb_match = 0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
327
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
328
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
329 my $nb_covered_subject=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
330 my $nb_covered_query=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
331 my %match=%{$current_match_table[0]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
332 my $q_length = $match{"Query_length"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
333 my $sub_length = $match{"Subject_length"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
334 my $subject = $match{"Subject"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
335 my $min_query = $match{"Query_start"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
336 my $max_query = $match{"Query_end"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
337 my $min_subject = $match{"Subject_start"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
338 my $max_subject = $match{"Subject_end"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
339 my $orientation = $match{"Orientation"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
340 my $Query = $match{"Query"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
341 my $Subject_Id = $match{"Subject_id"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
342 my $Subject = $match{"Subject"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
343
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
344
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
345 for (my $i=0;$i<=$#current_match_table;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
346 my %match1=%{$current_match_table[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
347 $nb_covered_subject += $match1{"Subject_end"} - $match1{"Subject_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
348 $nb_covered_query += $match1{"Query_end"} - $match1{"Query_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
349
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
350 if ($match1{"Query_start"}<$min_query){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
351 $min_query = $match1{"Query_start"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
352 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
353 if ($match1{"Query_end"}>$max_query){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
354 $max_query = $match1{"Query_end"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
355 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
356 if ($match1{"Subject_start"}<$min_subject){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
357 $min_subject = $match1{"Subject_start"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
358 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
359 if ($match1{"Subject_end"}>$max_subject){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
360 $max_subject = $match1{"Subject_end"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
361 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
362
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
363 $nb_match += $match1{"Similarity"};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
364
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
365 for (my $j=$i+1;$j<=$#current_match_table;$j++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
366 my $overlap_query=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
367 my $overlap_subject=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
368 my %match2=%{$current_match_table[$j]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
369
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
370 if (($match1{"Subject_start"}<=$match2{"Subject_start"})&&($match1{"Subject_end"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"}))
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
371 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
372 $overlap_subject = $match1{"Subject_end"} - $match2{"Subject_start"}+1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
373 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
374 elsif (($match2{"Subject_start"}<=$match1{"Subject_start"})&&($match2{"Subject_end"}>=$match1{"Subject_start"})&&($match2{"Subject_end"}<=$match1{"Subject_end"}))
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
375 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
376 $overlap_subject = $match2{"Subject_end"} - $match1{"Subject_start"}+1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
377 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
378
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
379 if (($match1{"Query_start"}<=$match2{"Query_start"})&&($match1{"Query_end"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"}))
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
380 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
381 $overlap_query = $match1{"Query_end"} - $match2{"Query_start"} +1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
382 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
383 elsif (($match2{"Query_start"}<=$match1{"Query_start"})&&($match2{"Query_end"}>=$match1{"Query_start"})&&($match2{"Query_end"}<=$match1{"Query_end"}))
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
384 {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
385 $overlap_query = $match2{"Query_end"} - $match1{"Query_start"}+1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
386 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
387
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
388 if ($overlap_query > $overlap_subject){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
389 $overlap_length += $overlap_query;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
390 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
391 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
392 $overlap_length += $overlap_subject;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
393 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
394
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
395 $nb_covered_subject -= $overlap_subject;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
396 $nb_covered_query-= $overlap_query;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
397 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
398
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
399 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
400 ### Cas des tblastx ou le nb match est en aa, et les nb_covered en base
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
401 if ($nb_match/$nb_covered_subject<0.34){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
402 $nb_match = $nb_match *3;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
403 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
404 ####
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
405
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
406 $Identity = sprintf("%.2f",($nb_match-$overlap_length)*100/$nb_covered_subject);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
407
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
408 $Subject_coverage = sprintf("%.2f",$nb_covered_subject*100/$sub_length);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
409 $Query_coverage = sprintf("%.2f",$nb_covered_query*100/$q_length);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
410
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
411 if ($Subject_coverage<0.1){$stats_query_coverage{"0-10%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
412 elsif($Subject_coverage<0.2){$stats_query_coverage{"10-20%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
413 elsif($Subject_coverage<0.3){$stats_query_coverage{"20-30%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
414 elsif($Subject_coverage<0.4){$stats_query_coverage{"30-40%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
415 elsif($Subject_coverage<0.5){$stats_query_coverage{"40-50%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
416 elsif($Subject_coverage<0.6){$stats_query_coverage{"50-60%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
417 elsif($Subject_coverage<0.7){$stats_query_coverage{"60-70%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
418 elsif($Subject_coverage<0.8){$stats_query_coverage{"70-80%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
419 elsif($Subject_coverage<0.9){$stats_query_coverage{"80-90%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
420 else{$stats_query_coverage{"90-100%"}++;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
421
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
422 if ($VERBOSE eq "ON"){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
423 print LF "Final\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
424 print LF $Query,"\t",$Subject_Id,"\t",$orientation,"\t",$min_query,"\t",$max_query,"\t",$min_subject,"\t",$max_subject,"\t",$sub_length,"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
425 print LF "NB:",$nb_match,"\t","O:",$overlap_length,"\t","CQ:",$nb_covered_query,"\t","CS:",$nb_covered_subject,"\t",$Query_coverage,"\t",$Subject_coverage,"\t",$Identity,"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
426
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
427 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
428
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
429 if ($subject=~/^(.*?)\s*$/){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
430 $subject = $1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
431 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
432 my %current_match_joined;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
433 $current_match_joined{"Query"}=$Query;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
434 $current_match_joined{"Query_start"}=$min_query;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
435 $current_match_joined{"Query_end"}=$max_query;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
436 $current_match_joined{"Query_length"}=$q_length;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
437 $current_match_joined{"QCoverage"} = $Query_coverage;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
438 $current_match_joined{"Subject_id"}=$Subject_Id;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
439 $current_match_joined{"Subject"}=$subject;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
440 $current_match_joined{"Subject_start"}=$min_subject;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
441 $current_match_joined{"Subject_end"}=$max_subject;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
442 $current_match_joined{"Subject_length"}=$sub_length;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
443 $current_match_joined{"SCoverage"} = $Subject_coverage;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
444 $current_match_joined{"Similarity"}=$Identity;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
445 my $NBmatch = $nb_match-$overlap_length;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
446 $current_match_joined{"Nbmatch"}=$NBmatch;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
447 $current_match_joined{"Display"}="$Query\t$Subject_Id\t$orientation\t$Query_coverage%\t$Subject_coverage%\t$Identity%\t$min_query\t$max_query\t$min_subject\t$max_subject\t$NBmatch\t$q_length\t$sub_length\t$subject";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
448
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
449 push(@match_joined,\%current_match_joined);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
450 #print OJ $match_joined{"Display"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
451 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
452 my @match_joined_sorted = sort sortbyrelevanceandsubject @match_joined;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
453 for (my $i=0;$i<=$#match_joined_sorted;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
454 my %match = %{$match_joined_sorted[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
455 print OJ $match{"Display"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
456 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
457 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
458
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
459
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
460 #my %all_match_joined_best;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
461
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
462 #foreach my $key (sort sortkey keys %all_match_joined){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
463 # my %match = %{$all_match_joined{$key}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
464 # print OJ $match{"Display"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
465 #}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
466
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
467 #close (OB);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
468 close (OJ);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
469
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
470
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
471 print LF "Nb query : $#query_keys\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
472 print LF "Nb match : $stats_nb_match\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
473 print LF "Nb match filtered included / too large overlap : $stats_included / $stats_large_overlapping \n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
474 print LF "Query coverage\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
475 print LF "percent:\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
476 foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
477 print LF $key,"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
478 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
479 print LF "\n number :\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
480 foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
481 print LF $stats_query_coverage{$key},"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
482 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
483 print LF "\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
484
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
485
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
486 close (LF);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
487
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
488
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
489 # for (my $i=0;$i<=$#all_match_joined;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
490 # my $match_joined = %{$all_match_joined[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
491 # print $match_joined{"Query"},"\t",$match_joined{"Subject"},"\t",$match_joined{"Subject_id"},"\t",$match_joined{"Similarity"},"\t",$match_joined{"Query_length"},"\t",$match_joined{"Subject_length"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
492 # }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
493
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
494
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
495 sub mysort{
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
496 my %matcha=%{$a};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
497 my %matchb=%{$b};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
498
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
499 #print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
500
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
501 $matcha{"Query_start"} <=> $matchb{"Query_start"}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
502 ||
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
503 $matcha{"Query_end"} <=> $matchb{"Query_end"}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
504
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
505 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
506
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
507 sub sortbyquerycoord{
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
508 my %matcha=%{$a};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
509 my %matchb=%{$b};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
510
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
511 #print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
512
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
513 $matcha{"Query_start"} <=> $matchb{"Query_start"}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
514 ||
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
515 $matcha{"Query_end"} <=> $matchb{"Query_end"}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
516
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
517 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
518
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
519 sub sortbyrelevanceandsubject{
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
520 my %matcha=%{$a};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
521 my %matchb=%{$b};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
522
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
523 $matchb{"Nbmatch"} <=> $matcha{"Nbmatch"}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
524 ||
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
525 $matchb{"QCoverage"} <=> $matcha{"QCoverage"}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
526 ||
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
527 $matcha{"Subject"} cmp $matchb{"Subject"}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
528 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
529
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
530
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
531 sub sortkey {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
532 my @fieldsa = split (/\#/,$a);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
533 my @fieldsb = split (/\#/,$b);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
534
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
535 #print "$a\n$b\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
536 #print $fieldsa[0]," cmp ",$fieldsb[0],"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
537 #exit(0);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
538
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
539 $fieldsa[0] cmp $fieldsb[0]
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
540 ||
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
541 $fieldsa[1] cmp $fieldsb[1]
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
542 ||
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
543 $fieldsa[2] <=> $fieldsb[2]
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
544
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
545 }