diff genephys/MergeBlastResults.pl @ 3:8dfa09868059 draft

Uploaded
author mcharles
date Fri, 24 Oct 2014 05:54:20 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/MergeBlastResults.pl	Fri Oct 24 05:54:20 2014 -0400
@@ -0,0 +1,545 @@
+#!/usr/bin/perl
+#V1.0.3 header added
+#V1.0.2 suppressed the final sort (very heavyload) and replaced it by another level of hash
+#V1.0.1 added log, option parameters
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $inputblast;
+my $outputjoin;
+my $log_file;
+my $MAX_OVERLAP_FRACTION = 0.5;
+my $MAX_OVERLAP_LENGTH_IGNORED = 3;
+my $VERBOSE = "OFF";
+my $ALLOWED_GAP_FRACTION_FOR_MERGING = 0.3;
+my $HEADER ="";
+
+GetOptions (
+"input_blasttab_file=s" => \$inputblast,
+"output_joinmatch_file=s" => \$outputjoin,
+"log_file=s" => \$log_file,
+"header=s" => \$HEADER,
+"max_overlap_fraction=f" => \$MAX_OVERLAP_FRACTION,
+"max_overlap_length_ignored=i" =>\$MAX_OVERLAP_LENGTH_IGNORED
+) or die("Error in command line arguments\n");
+
+open(IB, $inputblast) or die ("Can't open $inputblast \n");
+open (LF,">$log_file") or die("Can't open $log_file\n");
+
+
+my %match_by_query;
+
+my @query_keys;
+my %querys;
+
+
+my $stats_nb_match=0;
+my $stats_included=0;
+my $stats_large_overlapping=0;
+my %stats_query_coverage;
+$stats_query_coverage{"0-10%"}=0;
+$stats_query_coverage{"10-20%"}=0;
+$stats_query_coverage{"20-30%"}=0;
+$stats_query_coverage{"30-40%"}=0;
+$stats_query_coverage{"40-50%"}=0;
+$stats_query_coverage{"50-60%"}=0;
+$stats_query_coverage{"60-70%"}=0;
+$stats_query_coverage{"70-80%"}=0;
+$stats_query_coverage{"80-90%"}=0;
+$stats_query_coverage{"90-100%"}=0;
+
+my $current_query="";
+while (my $ligne = <IB>){
+	my @fields = split (/\t/,$ligne);
+	if ($#fields != 9){
+		print STDERR "Invalid blasttab format, must have 10 columns\n";
+		exit(0);
+	}
+	$stats_nb_match++;
+	my %match;
+	$match{"Query"}=$fields[0];
+	if (!$querys{$match{"Query"}}){
+		push(@query_keys,$match{"Query"});
+		$querys{$match{"Query"}} = 1;
+	}
+	$match{"Subject_id"}=$fields[1];
+	$match{"Orientation"}="+";
+	$match{"Query_start"}=$fields[2];
+	$match{"Query_end"}=$fields[3];
+	$match{"Subject_start"}=$fields[4];
+	$match{"Subject_end"}=$fields[5];
+			
+
+	if ($fields[2]>$fields[3]){
+		$match{"Query_start"}=$fields[3];
+		$match{"Query_end"}=$fields[2];
+		$match{"Orientation"}="-";
+		#print "- $ligne";
+	}
+	
+	if ($fields[4]>$fields[5]){
+		$match{"Subject_start"}=$fields[5];
+		$match{"Subject_end"}=$fields[4];
+		$match{"Orientation"}="-";
+		#print "- $ligne";
+	}
+	$match{"Similarity"}=$fields[6];
+	$match{"Query_length"}=$fields[7];
+	$match{"Subject_length"}=$fields[8];
+	chomp($fields[9]);
+	$match{"Subject"}=$fields[9];
+	
+	$match{"Ligne"}=$ligne;
+	
+	my $querykey = $match{"Query"};
+	my $key = $match{"Query"}."##".$match{"Subject"}."##".$match{"Orientation"};
+	if ($match{"Subject_length"}==0){
+		print LF "Match 0",$ligne,"\n",$match{"Subject_length"},"\n";
+	}	
+	my %match_by_query_and_subject;
+	my @match_table;
+
+	if ($match_by_query{$querykey}){
+		%match_by_query_and_subject = %{$match_by_query{$querykey}};
+	}
+	if ($match_by_query_and_subject{$key}){
+		@match_table=@{$match_by_query_and_subject{$key}};
+	}
+
+	push (@match_table,\%match);
+	$match_by_query_and_subject{$key} = \@match_table;
+	$match_by_query{$querykey}= \%match_by_query_and_subject;
+}
+
+close (IB);
+
+#print LF "NB query : $#query_keys\n";
+#foreach my $querykey (sort @query_keys){
+#	my %current_match_by_query_and_subject = %{$match_by_query{$querykey}};
+#	foreach my $key (sort {$a cmp $b} keys %current_match_by_query_and_subject){
+#		my @current_match_table = sort sortbyquerycoord @{$current_match_by_query_and_subject{$key}};
+#		for (my $i=0;$i<=$#current_match_table;$i++){
+#			my %current_match = %{$current_match_table[$i]};
+#			print LF $current_match{"Ligne"}."\n";
+#		}
+#	}
+#}
+#exit(0);
+
+open (OJ, ">$outputjoin") or die ("Can't open $outputjoin \n");
+print OJ "##",$HEADER,"\n";
+print OJ "#Query\tSubject_Id\torientation\tQuery_coverage\tSubject_coverage\tIdentity\tmin_query\tmax_query\tmin_subject\tmax_subject\tNBmatch\tq_length\tsub_length\tsubject\n";
+	
+foreach my $querykey (sort @query_keys){
+	my %current_match_by_query_and_subject = %{$match_by_query{$querykey}};
+	my @match_joined; #la table qui contient les matchs (hash) projeté de chaque subject pour une query
+	foreach my $key (sort {$a cmp $b} keys %current_match_by_query_and_subject){
+		my @current_match_table = sort sortbyquerycoord @{$current_match_by_query_and_subject{$key}};
+		my @duplicate;
+		my @overlap;
+		for (my $i=0;$i<=$#current_match_table;$i++){
+			push (@duplicate,0);
+		}
+		if ($VERBOSE eq "ON"){
+			print LF "\nTable Match ($#current_match_table)\t$key\n";
+			for (my $i=0;$i<=$#current_match_table;$i++){
+				my %match=%{$current_match_table[$i]};
+				print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t";
+				print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n";
+			}
+		}
+	
+		#Scan d'inclusion strict
+		for (my $i=0;$i<=$#current_match_table;$i++){
+			my %match1=%{$current_match_table[$i]};
+			for (my $j=0;$j<=$#current_match_table;$j++){
+				if (($j != $i)&&($duplicate[$j]==0)){ # On scan dans les deux sens, pas seulement $j = $i+1 a cause du last;
+					my %match2=%{$current_match_table[$j]};
+					# Inclus Subject
+					if (($match1{"Subject_start"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"}))
+					{
+						$duplicate[$i]=1;
+						last;
+					}
+					# Inclus Query
+					elsif (($match1{"Query_start"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"}))
+					{
+						$duplicate[$i]=2;
+						last;
+					}
+
+				}
+			}
+		}
+	
+		my @current_match_table_filtered;
+		for (my $i=0;$i<=$#current_match_table;$i++){
+			if ($duplicate[$i] == 0){
+				push (@current_match_table_filtered,$current_match_table[$i]);
+			}
+			else{
+				$stats_included++;
+			}
+		}
+	
+		if ($#current_match_table > $#current_match_table_filtered){
+			@current_match_table = @current_match_table_filtered;
+			if ($VERBOSE eq "ON"){
+				print LF "Table Match filtered 1 ($#current_match_table)\n";
+				for (my $i=0;$i<=$#current_match_table;$i++){
+					my %match=%{$current_match_table[$i]};
+					print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t";
+					print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n";
+				}
+			}
+		}
+	
+		undef @duplicate;
+		for (my $i=0;$i<=$#current_match_table;$i++){
+			push (@duplicate,0);
+		}
+		########
+	
+		###Scan d'overlap trop important
+		# D'abord subject
+		for (my $i=0;$i<=$#current_match_table-1;$i++){
+			my %match1=%{$current_match_table[$i]};
+			for (my $j=$i+1;$j<=$#current_match_table;$j++){
+				my %match2=%{$current_match_table[$j]};
+				if (($match1{"Subject_start"}<=$match2{"Subject_start"})&&($match1{"Subject_end"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"}))
+				 {
+					my $overlap_length = $match1{"Subject_end"} - $match2{"Subject_start"}+1;
+					my $length1 = $match1{"Subject_end"} - $match1{"Subject_start"} +1;
+					my $length2 = $match2{"Subject_end"} - $match2{"Subject_start"} +1;
+				
+					if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
+						$duplicate[$j]=1;
+					}
+					elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
+						$duplicate[$i]=1;
+					}
+					else {
+					}
+				}
+				elsif (($match2{"Subject_start"}<=$match1{"Subject_start"})&&($match2{"Subject_end"}>=$match1{"Subject_start"})&&($match2{"Subject_end"}<=$match1{"Subject_end"})) #Recherche d'inclusion dans les deux sens car els match sont classé par query coord, par par subject coord
+				{
+					my $overlap_length = $match2{"Subject_end"} - $match1{"Subject_start"}+1;
+					my $length1 = $match1{"Subject_end"} - $match1{"Subject_start"} +1;
+					my $length2 = $match2{"Subject_end"} - $match2{"Subject_start"} +1;
+				
+					if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
+						$duplicate[$j]=1;
+					}
+					elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
+						$duplicate[$i]=1;
+					}
+					else {
+					}	
+				}
+			}
+		}
+		
+		undef @current_match_table_filtered;
+		for (my $i=0;$i<=$#current_match_table;$i++){
+			if ($duplicate[$i] == 0){
+				push (@current_match_table_filtered,$current_match_table[$i]);
+			}
+			else {
+				$stats_large_overlapping++;
+			}
+		}
+	
+		if ($#current_match_table > $#current_match_table_filtered){
+			@current_match_table = @current_match_table_filtered;
+			if ($VERBOSE eq "ON"){
+				print LF "Table Match filtered 2 ($#current_match_table)\n";
+				for (my $i=0;$i<=$#current_match_table;$i++){
+					my %match=%{$current_match_table[$i]};
+					print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t";
+					print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n";
+				}
+			}	
+
+		}
+	
+		undef @duplicate;
+		for (my $i=0;$i<=$#current_match_table;$i++){
+			push (@duplicate,0);
+		}
+	
+		# Ensuite Query (Subject puis Query pour evitez des deletions complementaires query update qui enleverait toutes les entrées)
+			
+		for (my $i=0;$i<=$#current_match_table-1;$i++){
+			my %match1=%{$current_match_table[$i]};
+			for (my $j=$i+1;$j<=$#current_match_table;$j++){
+				my %match2=%{$current_match_table[$j]};			
+				if (($match1{"Query_start"}<=$match2{"Query_start"})&&($match1{"Query_end"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"}))
+				{
+					my $overlap_length = $match1{"Query_end"} - $match2{"Query_start"}+1;
+					my $length1 = $match1{"Query_end"} - $match1{"Query_start"} +1;
+					my $length2 = $match2{"Query_end"} - $match2{"Query_start"} +1;
+				
+					if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
+						$duplicate[$j]=1;
+					}
+					elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){
+						$duplicate[$i]=1;
+					}
+					else {
+					}
+				}
+				#un seul sans les query sont classés
+			}
+		}
+			
+		undef @current_match_table_filtered;
+		for (my $i=0;$i<=$#current_match_table;$i++){
+			if ($duplicate[$i] == 0){
+				push (@current_match_table_filtered,$current_match_table[$i]);
+			}
+			else {
+				$stats_large_overlapping++;
+			}
+		}
+	
+		if ($#current_match_table > $#current_match_table_filtered){
+			@current_match_table = @current_match_table_filtered;
+			if ($VERBOSE eq "ON"){
+				print LF "Table Match filtered 3 ($#current_match_table)\n";
+				for (my $i=0;$i<=$#current_match_table;$i++){
+					my %match=%{$current_match_table[$i]};
+					print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t";
+					print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n";
+				}
+			}
+		}	
+	
+
+	
+		#Fusion des Hsp et Calcul des nouvelles metriques
+	
+		my $overlap_length = 0;
+		my $Subject_coverage;
+		my $Query_coverage;
+		my $Identity;
+		my $nb_match = 0;
+	
+
+		my $nb_covered_subject=0;
+		my $nb_covered_query=0;
+		my %match=%{$current_match_table[0]};
+		my $q_length = $match{"Query_length"};
+		my $sub_length = $match{"Subject_length"};
+		my $subject = $match{"Subject"};
+		my $min_query = $match{"Query_start"};
+		my $max_query = $match{"Query_end"};
+		my $min_subject = $match{"Subject_start"};
+		my $max_subject = $match{"Subject_end"};
+		my $orientation = $match{"Orientation"};
+		my $Query = $match{"Query"};
+		my $Subject_Id = $match{"Subject_id"};
+		my $Subject = $match{"Subject"};
+	
+	
+		for (my $i=0;$i<=$#current_match_table;$i++){
+			my %match1=%{$current_match_table[$i]};
+			$nb_covered_subject += $match1{"Subject_end"} - $match1{"Subject_start"} +1;
+			$nb_covered_query += $match1{"Query_end"} - $match1{"Query_start"} +1;
+			
+			if ($match1{"Query_start"}<$min_query){
+				$min_query = $match1{"Query_start"};
+			}
+			if ($match1{"Query_end"}>$max_query){
+				$max_query = $match1{"Query_end"};
+			}
+			if ($match1{"Subject_start"}<$min_subject){
+				$min_subject = $match1{"Subject_start"};
+			}
+			if ($match1{"Subject_end"}>$max_subject){
+				$max_subject = $match1{"Subject_end"};
+			}
+		
+			$nb_match += $match1{"Similarity"};
+		
+			for (my $j=$i+1;$j<=$#current_match_table;$j++){
+				my $overlap_query=0;
+				my $overlap_subject=0;
+				my %match2=%{$current_match_table[$j]};
+
+				if (($match1{"Subject_start"}<=$match2{"Subject_start"})&&($match1{"Subject_end"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"}))
+				{
+					$overlap_subject = $match1{"Subject_end"} - $match2{"Subject_start"}+1;
+				}
+				elsif (($match2{"Subject_start"}<=$match1{"Subject_start"})&&($match2{"Subject_end"}>=$match1{"Subject_start"})&&($match2{"Subject_end"}<=$match1{"Subject_end"}))
+				{
+					$overlap_subject = $match2{"Subject_end"} - $match1{"Subject_start"}+1;
+				}
+			
+				if (($match1{"Query_start"}<=$match2{"Query_start"})&&($match1{"Query_end"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"}))
+				{
+					$overlap_query = $match1{"Query_end"} - $match2{"Query_start"} +1;
+				}
+				elsif (($match2{"Query_start"}<=$match1{"Query_start"})&&($match2{"Query_end"}>=$match1{"Query_start"})&&($match2{"Query_end"}<=$match1{"Query_end"}))
+				{
+					$overlap_query = $match2{"Query_end"} - $match1{"Query_start"}+1;
+				}
+			
+				if ($overlap_query > $overlap_subject){
+					$overlap_length += $overlap_query;
+				}
+				else {
+					$overlap_length += $overlap_subject;
+				}
+			
+				$nb_covered_subject -= $overlap_subject;
+				$nb_covered_query-= $overlap_query;
+			}
+		
+		}
+		### Cas des tblastx ou le nb match est en aa, et les nb_covered en base
+		if ($nb_match/$nb_covered_subject<0.34){
+			$nb_match = $nb_match *3;
+		}
+		####
+	
+		$Identity = sprintf("%.2f",($nb_match-$overlap_length)*100/$nb_covered_subject);
+
+		$Subject_coverage = sprintf("%.2f",$nb_covered_subject*100/$sub_length);
+		$Query_coverage = sprintf("%.2f",$nb_covered_query*100/$q_length);		
+
+		if ($Subject_coverage<0.1){$stats_query_coverage{"0-10%"}++;}
+		elsif($Subject_coverage<0.2){$stats_query_coverage{"10-20%"}++;}
+		elsif($Subject_coverage<0.3){$stats_query_coverage{"20-30%"}++;}
+		elsif($Subject_coverage<0.4){$stats_query_coverage{"30-40%"}++;}
+		elsif($Subject_coverage<0.5){$stats_query_coverage{"40-50%"}++;}
+		elsif($Subject_coverage<0.6){$stats_query_coverage{"50-60%"}++;}
+		elsif($Subject_coverage<0.7){$stats_query_coverage{"60-70%"}++;}
+		elsif($Subject_coverage<0.8){$stats_query_coverage{"70-80%"}++;}
+		elsif($Subject_coverage<0.9){$stats_query_coverage{"80-90%"}++;}
+		else{$stats_query_coverage{"90-100%"}++;}
+
+		if ($VERBOSE eq "ON"){
+			print LF "Final\n";
+			print LF $Query,"\t",$Subject_Id,"\t",$orientation,"\t",$min_query,"\t",$max_query,"\t",$min_subject,"\t",$max_subject,"\t",$sub_length,"\t";
+			print LF "NB:",$nb_match,"\t","O:",$overlap_length,"\t","CQ:",$nb_covered_query,"\t","CS:",$nb_covered_subject,"\t",$Query_coverage,"\t",$Subject_coverage,"\t",$Identity,"\n";
+
+		}
+
+		if ($subject=~/^(.*?)\s*$/){
+			$subject = $1;
+		}
+		my %current_match_joined;
+		$current_match_joined{"Query"}=$Query;
+		$current_match_joined{"Query_start"}=$min_query;
+		$current_match_joined{"Query_end"}=$max_query;
+		$current_match_joined{"Query_length"}=$q_length;
+		$current_match_joined{"QCoverage"} = $Query_coverage;
+		$current_match_joined{"Subject_id"}=$Subject_Id;
+		$current_match_joined{"Subject"}=$subject;
+		$current_match_joined{"Subject_start"}=$min_subject;
+		$current_match_joined{"Subject_end"}=$max_subject;
+		$current_match_joined{"Subject_length"}=$sub_length;
+		$current_match_joined{"SCoverage"} = $Subject_coverage;
+		$current_match_joined{"Similarity"}=$Identity;
+		my $NBmatch = $nb_match-$overlap_length;
+		$current_match_joined{"Nbmatch"}=$NBmatch;
+		$current_match_joined{"Display"}="$Query\t$Subject_Id\t$orientation\t$Query_coverage%\t$Subject_coverage%\t$Identity%\t$min_query\t$max_query\t$min_subject\t$max_subject\t$NBmatch\t$q_length\t$sub_length\t$subject";
+
+		push(@match_joined,\%current_match_joined);
+		#print OJ $match_joined{"Display"},"\n";
+	}
+	my @match_joined_sorted = sort sortbyrelevanceandsubject @match_joined;
+	for (my $i=0;$i<=$#match_joined_sorted;$i++){
+		my %match = %{$match_joined_sorted[$i]};
+		print OJ $match{"Display"},"\n";
+	}
+}
+
+
+#my %all_match_joined_best;
+
+#foreach my $key (sort sortkey keys %all_match_joined){
+#	my %match = %{$all_match_joined{$key}};
+#	print OJ $match{"Display"},"\n";
+#}
+
+#close (OB);
+close (OJ);
+
+
+print LF "Nb query : $#query_keys\n";
+print LF "Nb match : $stats_nb_match\n";
+print LF "Nb match filtered included / too large overlap : $stats_included / $stats_large_overlapping \n";
+print LF "Query coverage\n";
+print LF "percent:\t";
+foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) {
+	print LF $key,"\t";
+}
+print LF "\n number :\t";
+foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) {
+	print LF $stats_query_coverage{$key},"\t";
+}
+print LF "\n";
+
+
+close (LF);
+
+
+# for (my $i=0;$i<=$#all_match_joined;$i++){
+	# my $match_joined = %{$all_match_joined[$i]};
+	# print $match_joined{"Query"},"\t",$match_joined{"Subject"},"\t",$match_joined{"Subject_id"},"\t",$match_joined{"Similarity"},"\t",$match_joined{"Query_length"},"\t",$match_joined{"Subject_length"},"\n";
+# }
+
+
+sub mysort{
+	my %matcha=%{$a};
+	my %matchb=%{$b};
+	
+	#print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\n";
+	
+	$matcha{"Query_start"} <=> $matchb{"Query_start"}
+	||
+	$matcha{"Query_end"} <=> $matchb{"Query_end"}
+	
+}
+
+sub sortbyquerycoord{
+	my %matcha=%{$a};
+	my %matchb=%{$b};
+	
+	#print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\n";
+	
+	$matcha{"Query_start"} <=> $matchb{"Query_start"}
+	||
+	$matcha{"Query_end"} <=> $matchb{"Query_end"}
+	
+}
+
+sub sortbyrelevanceandsubject{
+	my %matcha=%{$a};
+	my %matchb=%{$b};
+	
+	$matchb{"Nbmatch"} <=> $matcha{"Nbmatch"}
+	||
+	$matchb{"QCoverage"} <=> $matcha{"QCoverage"}
+	||
+	$matcha{"Subject"} cmp $matchb{"Subject"}
+}
+
+
+sub sortkey {
+	my @fieldsa = split (/\#/,$a);
+	my @fieldsb = split (/\#/,$b);
+	
+	#print "$a\n$b\n";
+	#print $fieldsa[0]," cmp ",$fieldsb[0],"\n";
+	#exit(0);
+
+	$fieldsa[0] cmp $fieldsb[0]
+	||
+	$fieldsa[1] cmp $fieldsb[1]
+	||
+	$fieldsa[2] <=> $fieldsb[2]
+
+}