diff rapsodyn/PrepareFastqLight.pl @ 8:d857538d9fea draft

Uploaded
author mcharles
date Fri, 10 Oct 2014 07:05:36 -0400
parents 3f7b0788a1c4
children 0a6c1cfe4dc8
line wrap: on
line diff
--- a/rapsodyn/PrepareFastqLight.pl	Tue Oct 07 10:34:34 2014 -0400
+++ b/rapsodyn/PrepareFastqLight.pl	Fri Oct 10 07:05:36 2014 -0400
@@ -1,4 +1,6 @@
 #!/usr/bin/perl
+#v1.0.3 support rapsodyn header (.... 1:...  / .... 2:...)
+#V1.0.2 added auto type detection
 #V1.0.1 added log, option parameters
 use strict;
 use warnings;
@@ -41,11 +43,12 @@
 
 my $nb_base_current_t=0;
 
-
 open(READ1, $read1_file) or die ("Can't open $read1_file\n");
 open(READ2, $read2_file) or die ("Can't open $read2_file\n");
 open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n");
 open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n");
+open (LF,">$log_file") or die("Can't open $log_file\n");
+
 
 my $error1=0;
 my $error2=0;
@@ -58,6 +61,59 @@
 my $error9=0;
 my $error10=0;
 
+my $auto_type="";
+my %qual;
+if ($TYPE eq "auto"){
+	my $compt=0;
+	open(DETECT, $read1_file) or die ("Can't open $read1_file\n"); 
+	while (my $ligne1_r1 =<DETECT>){
+		my $ligne2_r1 =<DETECT>;
+		my $ligne3_r1 =<DETECT>;
+		my $ligne4_r1 =<DETECT>;
+		$compt++;
+		if ($ligne4_r1 =~ /^(.*)\s*$/i){
+			my $qual = $1;
+			my @q = split(//,$qual);
+			for (my $i=0;$i<=$#q;$i++){
+				my $num = ord($q[$i]);
+				if ($qual{$num}){
+					$qual{$num}++;
+				}
+				else {
+					$qual{$num} = 1;
+				}
+				#range sanger / illumina 1.8+ 	: 33->94
+				#range illumina 1.3->1.7 	: 64->105
+				if ($num > 94){$auto_type = "illumina";last;}
+				if ($num < 64){$auto_type = "sanger";last;}
+			}
+		}
+		else {
+			print STDERR "Error in format detection : quality not recognized\n$ligne4_r1";
+			exit(0);
+		}
+	
+		if ($auto_type ne ""){
+			last;
+		}
+
+	}
+	close (DETECT);
+	if ($auto_type eq ""){
+		print STDERR "Error in format detection : type not recognized parsing read1\n";
+		foreach my $key (sort {$a <=> $b} keys %qual){
+			print "$key\t:\t",$qual{$key},"\n";
+		}
+		exit(0);
+	}
+	else {
+		$TYPE = $auto_type;
+	}
+}
+
+
+
+
 while (my $ligne1_r1 =<READ1>){
 	my $ligne2_r1 =<READ1>;
 	my $ligne3_r1 =<READ1>;
@@ -119,19 +175,19 @@
 		my $repheader2="";
 
 		
-		if ($ligne1_r1 =~/^\@(.*?)\#/){
+		if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){
 			$header1 = $1;
 		}
 		
-		if ($ligne3_r1 =~/^\+(.*?)\#/){
+		if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){
                         $repheader1 = $1;
                	}
 
-		if ($ligne1_r2 =~/^\@(.*?)\#/){
+		if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){
                         $header2 = $1;
                	}
 
-		if ($ligne3_r2 =~/^\+(.*?)\#/){
+		if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){
                         $repheader2 = $1;
                	}
 #@ 2 sec
@@ -298,8 +354,8 @@
 close (OUT1);
 close (OUT2);
 
-open (LF,">$log_file") or die("Can't open $log_file\n");
 print LF "\n####\t Fastq preparation \n";
+print LF "Fastq format : $TYPE\n";
 print LF "## Before preparation\n";
 print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n";
 print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n";