diff rapsodyn/PrepareFastqLight.pl @ 15:56d328bce3a7 draft default tip

Uploaded
author mcharles
date Thu, 29 Jan 2015 08:54:06 -0500
parents 0a6c1cfe4dc8
children
line wrap: on
line diff
--- a/rapsodyn/PrepareFastqLight.pl	Mon Jan 26 18:10:52 2015 -0500
+++ b/rapsodyn/PrepareFastqLight.pl	Thu Jan 29 08:54:06 2015 -0500
@@ -1,4 +1,5 @@
 #!/usr/bin/perl
+#v1.1.1 new check on read synchro
 #v1.1.0 manage empty files
 #v1.0.4 bug correction, last read not considered
 #v1.0.3 support rapsodyn header (.... 1:...  / .... 2:...)
@@ -132,17 +133,12 @@
 	my $ligne2_r2 =<READ2>;
 	my $ligne3_r2 =<READ2>;
 	my $ligne4_r2 =<READ2>;
-	# chomp($ligne1_r1);
-	# chomp($ligne2_r1);
-	# chomp($ligne3_r1);
-	# chomp($ligne4_r1);
-	# chomp($ligne2_r1);
 	
 	$compt++;
 	$nb_read1++;
 	$nb_read2++;
 	
-#@ 1 sec
+
 	if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){
 		if ($VERBOSE eq "ON"){
 			print "Error in file format";
@@ -173,7 +169,7 @@
 		}
 		$error2++;
 	}
-#@ 1 - 2 sec
+
 	else {
 		
 		my $length_seq1 = length(chomp($ligne2_r1));
@@ -190,26 +186,38 @@
 		my $repheader1="";
 		my $repheader2="";
 
-		
-		if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){
+		my @tbl_header1;
+		my @tbl_header2;
+		if ($ligne1_r1 =~/^\@(.*?)\s*$/){
 			$header1 = $1;
+			@tbl_header1 = split(//,$header1);
 		}
 		
-		if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){
+                if ($ligne3_r1 =~/^\+(.*?)\s*$/){
                         $repheader1 = $1;
                	}
 
-		if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){
+                if ($ligne1_r2 =~/^\@(.*?)\s*$/){
                         $header2 = $1;
+                        @tbl_header2 = split(//,$header2);
                	}
 
-		if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){
+                if ($ligne3_r2 =~/^\+(.*?)\s*$/){
                         $repheader2 = $1;
                	}
-#@ 2 sec
+		my $diffheader=0;
+		if ($#tbl_header1 == $#tbl_header2){
+			for (my $i=0;$i<=$#tbl_header1;$i++){
+				if ($tbl_header1[$i] ne $tbl_header2[$i]){
+					$diffheader++;
+				}
+			}
+		}
 		
-		### Verification de la coherence sequence /qualité   @ 1 sec
-		if (($TYPE eq "illumina")&&((!$header1)||(!$header2)||(!$repheader1)||(!$repheader2))){
+		
+		
+		### Verification de la coherence sequence /qualité 
+		if ((!$header1)||(!$header2)){
 			if ($VERBOSE eq "ON"){
 				print "Error in header : empty\n";
 				print $ligne1_r1;
@@ -224,24 +232,9 @@
 			}
 			$error3++;
 		}
-		elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){
+		elsif ((($repheader1)&&($header1 ne $repheader1))||(($repheader2)&&($header2 ne $repheader2))){
 			if ($VERBOSE eq "ON"){
-				print "Error in header ref : empty\n";
-				print $ligne1_r1;
-				print $ligne2_r1;
-				print $ligne3_r1;
-				print $ligne4_r1;
-				print $ligne1_r2;
-				print $ligne2_r2;
-				print $ligne3_r2;
-				print $ligne4_r2;
-				print "\n";
-			}
-			$error3++;
-		}
-		elsif (($TYPE eq "illumina")&&(($header1 ne $repheader1)||($header2 ne $repheader2)||($header1 ne $header2))){
-			if ($VERBOSE eq "ON"){
-				print "Error in header : different\n";
+				print "Error : difference in header and header repeat\n";
 				print $ligne1_r1;
 				print $ligne2_r1;
 				print $ligne3_r1;
@@ -254,9 +247,24 @@
 			}
 			$error4++;
 		}
-		elsif (($TYPE eq "sanger")&&($header1 ne $header2)){
+		elsif ($#tbl_header1 != $#tbl_header2){
 			if ($VERBOSE eq "ON"){
-				print "Error in header : different\n";
+				print "Error : difference in header size between reads\n";
+				print $ligne1_r1;
+				print $ligne2_r1;
+				print $ligne3_r1;
+				print $ligne4_r1;
+				print $ligne1_r2;
+				print $ligne2_r2;
+				print $ligne3_r2;
+				print $ligne4_r2;
+				print "\n";
+			}
+			$error4++;
+		}
+		elsif ($diffheader > 1 ){ # More than ...1 and ...2 difference in read1 and read2 header
+			if ($VERBOSE eq "ON"){
+				print "Error can't establish synchro between reads, more than 1 difference between headers\n";
 				print $ligne1_r1;
 				print $ligne2_r1;
 				print $ligne3_r1;