Mercurial > repos > mcharles > rapsosnp
diff rapsodyn/PrepareFastqLight.pl @ 7:3f7b0788a1c4 draft
Uploaded
author | mcharles |
---|---|
date | Tue, 07 Oct 2014 10:34:34 -0400 |
parents | b0cbb9d21aa9 |
children | d857538d9fea |
line wrap: on
line diff
--- a/rapsodyn/PrepareFastqLight.pl Mon Sep 29 03:02:16 2014 -0400 +++ b/rapsodyn/PrepareFastqLight.pl Tue Oct 07 10:34:34 2014 -0400 @@ -1,28 +1,51 @@ #!/usr/bin/perl +#V1.0.1 added log, option parameters use strict; use warnings; - -my $read1 = $ARGV[0]; -my $read2 = $ARGV[1]; +use Getopt::Long; -my $output1 = $ARGV[2]; -my $output2 = $ARGV[3]; +my $read1_file; +my $read2_file; +my $log_file; +my $output1_file; +my $output2_file; -my $TYPE = $ARGV[4]; -my $MIN_LENGTH = $ARGV[5]; -my $MIN_QUALITY = $ARGV[6]; +my $TYPE="sanger"; +my $MIN_LENGTH=30; +my $MIN_QUALITY=30; + +my $VERBOSE = "OFF"; -my $VERBOSE = $ARGV[7]; +GetOptions ( +"read1_file=s" => \$read1_file, +"read2_file=s" => \$read2_file, +"log_file=s" => \$log_file, +"output1_file=s" => \$output1_file, +"output2_file=s" => \$output2_file, +"type=s" => \$TYPE, +"min_length=i" => \$MIN_LENGTH, +"min_quality=i" => \$MIN_QUALITY, +"verbose=s" => \$VERBOSE +) or die("Error in command line arguments\n"); -if (!$VERBOSE){ - $VERBOSE ="OFF"; -} -open(READ1, $read1) or die ("Can't open $read1\n"); -open(READ2, $read2) or die ("Can't open $read2\n"); -open(OUT1, ">$output1") or die ("Can't open $output1\n"); -open(OUT2, ">$output2") or die ("Can't open $output2\n"); +my $nb_read1=0; +my $nb_base_read1=0; +my $nb_read2=0; +my $nb_base_read2=0; +my $nb_read1_t=0; +my $nb_base_read1_t=0; +my $nb_read2_t=0; +my $nb_base_read2_t=0; + +my $nb_base_current_t=0; + + +open(READ1, $read1_file) or die ("Can't open $read1_file\n"); +open(READ2, $read2_file) or die ("Can't open $read2_file\n"); +open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n"); +open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n"); my $error1=0; my $error2=0; @@ -43,6 +66,9 @@ my $ligne2_r2 =<READ2>; my $ligne3_r2 =<READ2>; my $ligne4_r2 =<READ2>; + + $nb_read1++; + $nb_read2++; #@ 1 sec if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){ @@ -91,6 +117,7 @@ my $header2=""; my $repheader1=""; my $repheader2=""; + if ($ligne1_r1 =~/^\@(.*?)\#/){ $header1 = $1; @@ -190,9 +217,11 @@ ### Parsing sequence & qualité if ($ligne2_r1 =~ /^([ATGCNX]+)\s*$/i){ $seq1 = $1; + $nb_base_read1 += length($seq1); } if ($ligne2_r2 =~ /^([ATGCNX]+)\s*$/i){ $seq2 = $1; + $nb_base_read2 += length($seq2); } if ($ligne4_r1 =~ /^(.*)\s*$/i){ $qual1 = $1; @@ -236,37 +265,49 @@ else { my $fastq_lines_r1=""; my $fastq_lines_r2=""; + my $nb_base_current_read1_t = 0; + my $nb_base_current_read2_t = 0; + $fastq_lines_r1 = &grooming_and_trimming($ligne1_r1,$seq1,$qual1); + $nb_base_current_read1_t = $nb_base_current_t; if ($fastq_lines_r1){ $fastq_lines_r2 = &grooming_and_trimming($ligne1_r2,$seq2,$qual2); + $nb_base_current_read2_t = $nb_base_current_t; } if ($fastq_lines_r2){ print OUT1 $fastq_lines_r1; print OUT2 $fastq_lines_r2; + + $nb_read1_t++; + $nb_read2_t++; + $nb_base_read1_t += $nb_base_current_read1_t; + $nb_base_read2_t += $nb_base_current_read2_t; + + } } } - # print OUT1 $ligne1_r1; - # print OUT1 $ligne2_r1; - # print OUT1 $ligne3_r1; - # print OUT1 $ligne4_r1; - # print OUT2 $ligne1_r2; - # print OUT2 $ligne2_r2; - # print OUT2 $ligne3_r2; - # print OUT2 $ligne4_r2; - + #@ 7 sec } } - - close (READ1); close (READ2); close (OUT1); close (OUT2); +open (LF,">$log_file") or die("Can't open $log_file\n"); +print LF "\n####\t Fastq preparation \n"; +print LF "## Before preparation\n"; +print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n"; +print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n"; +print LF "## After preparation\n"; +print LF "#Read1 :\t$nb_read1_t\t#Base :\t$nb_base_read1_t\n"; +print LF "#Read2 :\t$nb_read2_t\t#Base :\t$nb_base_read2_t\n"; +close (LF); + sub grooming_and_trimming{ my $header = shift; @@ -326,7 +367,13 @@ } my $lengthTrim = $stopTrim - $startTrim +1; - + + #if ($stats_length{$lengthTrim}){ + # $stats_length{$lengthTrim} = 1; + #} + #else { + # $stats_length{$lengthTrim}++; + #} my $fastq_lines=""; # if ($header =~ /GA8\-EAS671_0005\:3\:1\:1043\:4432/){ @@ -343,11 +390,23 @@ # print "\n"; # } + #for (my $i=$startTrim;$i<=$stopTrim;$i++){ + # if ($stats_quality{ord($q{$i])}){ + # $stats_quality{ord($q{$i])}=1; + # } + # else { + # $stats_quality{ord($q{$i])}++; + # } + #} + if ($lengthTrim >= $MIN_LENGTH){ $fastq_lines .= $header; - $fastq_lines .= substr($seq,$startTrim,$lengthTrim)."\n"; + my $new_seq = substr($seq,$startTrim,$lengthTrim); + $nb_base_current_t = length($new_seq); + $fastq_lines .= $new_seq."\n"; $fastq_lines .= "+\n"; - $fastq_lines .= substr($quality,$startTrim,$lengthTrim)."\n"; + my $new_q = substr($quality,$startTrim,$lengthTrim); + $fastq_lines .= $new_q."\n"; return $fastq_lines; }