Mercurial > repos > mcharles > rapsosnp
diff rapsodyn/PrepareFastqLight.pl @ 5:b0cbb9d21aa9 draft
Uploaded
author | mcharles |
---|---|
date | Mon, 22 Sep 2014 10:19:53 -0400 |
parents | 9074a5104cdd |
children | 3f7b0788a1c4 |
line wrap: on
line diff
--- a/rapsodyn/PrepareFastqLight.pl Wed Sep 17 04:20:08 2014 -0400 +++ b/rapsodyn/PrepareFastqLight.pl Mon Sep 22 10:19:53 2014 -0400 @@ -268,123 +268,93 @@ close (OUT2); - - sub grooming_and_trimming{ my $header = shift; my $seq = shift; my $quality = shift; my $quality_converted=""; + my $quality_ori=$quality; - my $startnoN = 0; - my $stopnoN = length($quality)-1; + my $lengthseq = length($seq); + my $startTrim = 0; + my $stopTrim = length($quality)-1; + my $startnoN = $startTrim; + my $stopnoN = $stopTrim; -#print "HEAD:\t$header"; -#print "SEQ:\t$seq\n"; my $chercheN = $seq; - my @bad_position; + my @bad_position_N; + my @bad_position_Q; my $current_index = index($chercheN,"N"); my $abs_index = $current_index; while ($current_index >=0){ - push (@bad_position,$abs_index); + push (@bad_position_N,$abs_index); if ($current_index<length($seq)){ $chercheN = substr($chercheN,$current_index+1); $current_index = index($chercheN,"N"); - $abs_index = $current_index + $bad_position[$#bad_position]+1; + $abs_index = $current_index + $bad_position_N[$#bad_position_N]+1; } else { last; } } + + my @q = split(//,$quality); + for (my $i=0;$i<=$#q;$i++){ + my $chr = $q[$i]; + my $num = ord($q[$i]); + if ($TYPE eq "illumina"){ + $num = $num - 31; # 31 comme la difference entre la plage sanger (33-> 93 / 0->60) et illumina (64->104 / 0->40) + $quality_converted .= chr($num); + } + + if ($num < $MIN_QUALITY + 33){ #33 comme le départ de la plage sanger + push(@bad_position_Q,$i); + } + } + if ($quality_converted){$quality = $quality_converted;} + my @bad_position = (@bad_position_N, @bad_position_Q); if ($#bad_position>=0){ - my %coord=%{&extract_longer_string_coordinates_from_bad_position($startnoN,$stopnoN,\@bad_position)}; - $startnoN = $coord{"start"}; - $stopnoN = $coord{"stop"}; - } - my $lengthnoN = $stopnoN - $startnoN + 1; - my $seqnoN = substr($seq,$startnoN,$lengthnoN); -# print "SEQnoN\t:$seqnoN\n"; -# for (my $i=0;$i<=$#bad_position;$i++){ -# print $bad_position[$i]."\t"; -# } -# print "\n"; - - if ($lengthnoN >= $MIN_LENGTH){ - my $startTrim = $startnoN; - my $stopTrim = $stopnoN; - - my $quality_converted=""; - #my @bad_position; + @bad_position = sort {$a <=> $b} @bad_position; + my %coord=%{&extract_longer_string_coordinates_from_bad_position(0,$stopTrim,\@bad_position)}; + $startTrim = $coord{"start"}; + $stopTrim = $coord{"stop"}; +#print "$startTrim .. $stopTrim\n"; - my @q = split(//,$quality); - #print "QUALITY\n"; - #print "$quality\n"; - for (my $i=0;$i<=$stopnoN;$i++){ - my $chr = $q[$i]; - my $num = ord($q[$i]); - if ($TYPE eq "illumina"){ - $num = $num -64+33; - $quality_converted .= chr($num); - } - - if ($num <$MIN_QUALITY + 64 - 33 ){ - push(@bad_position,$i+$startnoN); - } - } - if ($quality_converted){$quality = $quality_converted;} - #print "$quality\n"; - - - - if ($#bad_position>=0){ - @bad_position = sort {$a <=> $b} @bad_position; -# for (my $i=0;$i<=$#bad_position;$i++){ -# print $bad_position[$i]."\t"; -# } -# print "\n"; - my %coord=%{&extract_longer_string_coordinates_from_bad_position($startnoN,$stopnoN,\@bad_position)}; - $startTrim = $coord{"start"}; - $stopTrim = $coord{"stop"}; - #print "$startTrim .. $stopTrim\n"; - - } - my $lengthTrim = $stopTrim - $startTrim +1; - - - my $fastq_lines=""; - - if ($lengthTrim >= $MIN_LENGTH){ - $fastq_lines .= $header; - $fastq_lines .= substr($seq,$startTrim,$lengthTrim)."\n"; - $fastq_lines .= "+\n"; - $fastq_lines .= substr($quality,$startTrim,$lengthTrim)."\n"; -# print $fastq_lines; - return $fastq_lines; - - } - else { - return ""; - } - - + } + my $lengthTrim = $stopTrim - $startTrim +1; + + my $fastq_lines=""; + +# if ($header =~ /GA8\-EAS671_0005\:3\:1\:1043\:4432/){ +# print "HEAD:\t$header"; +# print "SEQ:\n$seq\n"; +# print "$quality_ori\n"; +# print "$quality\n"; +# for (my $i=0;$i<=$#bad_position;$i++){ +# print $bad_position[$i]."(".$q[$bad_position[$i]]." : ".ord($q[$bad_position[$i]]).")"."\t"; +# } +# print "\n"; +# print "$startTrim .. $stopTrim / $lengthTrim \n"; +# print $fastq_lines; +# print "\n"; +# } + + if ($lengthTrim >= $MIN_LENGTH){ + $fastq_lines .= $header; + $fastq_lines .= substr($seq,$startTrim,$lengthTrim)."\n"; + $fastq_lines .= "+\n"; + $fastq_lines .= substr($quality,$startTrim,$lengthTrim)."\n"; + return $fastq_lines; } else { + #print "Insufficient length after trimming\n"; return ""; } - - - # my @s = split(//,$seq); - # my $sanger_quality=""; - - - - - # return $sanger_quality; } sub extract_longer_string_coordinates_from_bad_position{