diff rapsodyn/ParseBlastForUniqueMatch.pl @ 7:3f7b0788a1c4 draft

Uploaded
author mcharles
date Tue, 07 Oct 2014 10:34:34 -0400
parents 442a7c88b886
children 0a6c1cfe4dc8
line wrap: on
line diff
--- a/rapsodyn/ParseBlastForUniqueMatch.pl	Mon Sep 29 03:02:16 2014 -0400
+++ b/rapsodyn/ParseBlastForUniqueMatch.pl	Tue Oct 07 10:34:34 2014 -0400
@@ -1,13 +1,26 @@
 #!/usr/bin/perl
+#V1.0.1 added log, option parameters
 use strict;
 use warnings;
+use Getopt::Long;
 
 
-my $input_variant_file = $ARGV[0];
-my $input_blast_file = $ARGV[1];
-my $window_length = $ARGV[2];
-my $nb_mismatch_max = $ARGV[3];
+my $input_variant_file;
+my $input_blast_file;
+my $log_file;
+my $WINDOW_LENGTH= 50;
+my $NB_MISMATCH_MAX = 3;
 
+GetOptions (
+"input_variant_file=s" => \$input_variant_file,
+"input_blast_file=s" => \$input_blast_file,
+"window_length=s" => \$WINDOW_LENGTH,
+"log_file=s" => \$log_file,
+"nb_mismatch_max=s" => \$NB_MISMATCH_MAX
+) or die("Error in command line arguments\n");
+
+my $nb_variant_checked=0;
+my $nb_variant_selected=0;
 my %hash_name;
 
 open(INB, $input_blast_file) or die ("Can't open $input_blast_file\n");
@@ -71,7 +84,7 @@
 	my $nb_gap_query=0;
 	
 	if (length($query_aln) == length($subject_aln)){
-		if (length($query_aln)<$window_length-$nb_mismatch_max){
+		if (length($query_aln)<$WINDOW_LENGTH-$NB_MISMATCH_MAX){
 		}
 		else {
 			my @q = split(//,$query_aln);
@@ -79,27 +92,27 @@
 			for (my $i=0;$i<=$#q;$i++){
 				my $global_idx = $query_start-1+$i-$nb_gap_query;
 				if ($q[$i] eq "-"){
-					if ($global_idx < $window_length){
+					if ($global_idx < $WINDOW_LENGTH){
 						$compt_mismatch_5p++;
 					}
-					elsif ($global_idx > $window_length){
+					elsif ($global_idx > $WINDOW_LENGTH){
 						$compt_mismatch_3p++;
 					}
 					$nb_gap_query++; #On compte les gap dans la query pour les soustraire de l'index global
 				}
 				else {
 					if ($q[$i] ne $s[$i]){
-						if ($global_idx < $window_length){
+						if ($global_idx < $WINDOW_LENGTH){
 							$compt_mismatch_5p++;
 						}
-						elsif ($global_idx > $window_length){
+						elsif ($global_idx > $WINDOW_LENGTH){
 							$compt_mismatch_3p++;
 						}
 					}
 				}
 			}
 			$compt_mismatch_5p += $query_start-1;
-			$compt_mismatch_3p += $window_length *2 + 1 - $query_stop;
+			$compt_mismatch_3p += $WINDOW_LENGTH *2 + 1 - $query_stop;
 			
 			# for (my $i=0;$i<$window_length;$i++){
 				# if ($tbl_q_aln[$i] eq "#"){
@@ -122,7 +135,7 @@
 				# else {
 				# }
 			# }
-			if (($compt_mismatch_5p <= $nb_mismatch_max)||($compt_mismatch_3p <= $nb_mismatch_max)){
+			if (($compt_mismatch_5p <= $NB_MISMATCH_MAX)||($compt_mismatch_3p <= $NB_MISMATCH_MAX)){
 				$hash_name{$name}++;
 			}
 			
@@ -151,6 +164,7 @@
 open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n");
 
 while (my $ligne = <INV>) {
+	$nb_variant_checked++;
 	
 	my @champs = split (/\s+/,$ligne);
 	my $header = $champs[0]."_".$champs[1];
@@ -158,6 +172,7 @@
 	if ($hash_name{$header}){
 		if ($hash_name{$header}==1){
 			print $ligne;
+			$nb_variant_selected++;
 		}
 	}
 	else {
@@ -169,6 +184,12 @@
 
 close(INV);
 
+open (LF,">$log_file") or die("Can't open $log_file\n");
+print LF "\n####\t Blast filtering \n";
+print LF "Variant checked  :\t$nb_variant_checked\n";
+print LF "Variant selected :\t$nb_variant_selected\n";
+close (LF);
+
 
 # foreach my $key (sort keys %hash_name){
 	# print $key,"\t",$hash_name{$key},"\n";