Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/PrepareFastqLight.pl @ 5:b0cbb9d21aa9 draft
Uploaded
author | mcharles |
---|---|
date | Mon, 22 Sep 2014 10:19:53 -0400 |
parents | 9074a5104cdd |
children | 3f7b0788a1c4 |
comparison
equal
deleted
inserted
replaced
4:9074a5104cdd | 5:b0cbb9d21aa9 |
---|---|
266 close (READ2); | 266 close (READ2); |
267 close (OUT1); | 267 close (OUT1); |
268 close (OUT2); | 268 close (OUT2); |
269 | 269 |
270 | 270 |
271 | |
272 | |
273 sub grooming_and_trimming{ | 271 sub grooming_and_trimming{ |
274 my $header = shift; | 272 my $header = shift; |
275 my $seq = shift; | 273 my $seq = shift; |
276 my $quality = shift; | 274 my $quality = shift; |
277 my $quality_converted=""; | 275 my $quality_converted=""; |
278 | 276 my $quality_ori=$quality; |
279 my $startnoN = 0; | 277 |
280 my $stopnoN = length($quality)-1; | 278 my $lengthseq = length($seq); |
281 | 279 my $startTrim = 0; |
282 #print "HEAD:\t$header"; | 280 my $stopTrim = length($quality)-1; |
283 #print "SEQ:\t$seq\n"; | 281 my $startnoN = $startTrim; |
282 my $stopnoN = $stopTrim; | |
283 | |
284 | 284 |
285 my $chercheN = $seq; | 285 my $chercheN = $seq; |
286 my @bad_position; | 286 my @bad_position_N; |
287 my @bad_position_Q; | |
287 my $current_index = index($chercheN,"N"); | 288 my $current_index = index($chercheN,"N"); |
288 my $abs_index = $current_index; | 289 my $abs_index = $current_index; |
289 while ($current_index >=0){ | 290 while ($current_index >=0){ |
290 push (@bad_position,$abs_index); | 291 push (@bad_position_N,$abs_index); |
291 | 292 |
292 if ($current_index<length($seq)){ | 293 if ($current_index<length($seq)){ |
293 $chercheN = substr($chercheN,$current_index+1); | 294 $chercheN = substr($chercheN,$current_index+1); |
294 $current_index = index($chercheN,"N"); | 295 $current_index = index($chercheN,"N"); |
295 $abs_index = $current_index + $bad_position[$#bad_position]+1; | 296 $abs_index = $current_index + $bad_position_N[$#bad_position_N]+1; |
296 } | 297 } |
297 else { | 298 else { |
298 last; | 299 last; |
299 } | 300 } |
300 } | 301 } |
301 | 302 |
303 my @q = split(//,$quality); | |
304 for (my $i=0;$i<=$#q;$i++){ | |
305 my $chr = $q[$i]; | |
306 my $num = ord($q[$i]); | |
307 if ($TYPE eq "illumina"){ | |
308 $num = $num - 31; # 31 comme la difference entre la plage sanger (33-> 93 / 0->60) et illumina (64->104 / 0->40) | |
309 $quality_converted .= chr($num); | |
310 } | |
311 | |
312 if ($num < $MIN_QUALITY + 33){ #33 comme le départ de la plage sanger | |
313 push(@bad_position_Q,$i); | |
314 } | |
315 } | |
316 if ($quality_converted){$quality = $quality_converted;} | |
317 | |
318 my @bad_position = (@bad_position_N, @bad_position_Q); | |
302 | 319 |
303 if ($#bad_position>=0){ | 320 if ($#bad_position>=0){ |
304 my %coord=%{&extract_longer_string_coordinates_from_bad_position($startnoN,$stopnoN,\@bad_position)}; | 321 @bad_position = sort {$a <=> $b} @bad_position; |
305 $startnoN = $coord{"start"}; | 322 my %coord=%{&extract_longer_string_coordinates_from_bad_position(0,$stopTrim,\@bad_position)}; |
306 $stopnoN = $coord{"stop"}; | 323 $startTrim = $coord{"start"}; |
307 } | 324 $stopTrim = $coord{"stop"}; |
308 my $lengthnoN = $stopnoN - $startnoN + 1; | 325 #print "$startTrim .. $stopTrim\n"; |
309 my $seqnoN = substr($seq,$startnoN,$lengthnoN); | 326 |
310 # print "SEQnoN\t:$seqnoN\n"; | 327 } |
311 # for (my $i=0;$i<=$#bad_position;$i++){ | 328 my $lengthTrim = $stopTrim - $startTrim +1; |
312 # print $bad_position[$i]."\t"; | 329 |
330 my $fastq_lines=""; | |
331 | |
332 # if ($header =~ /GA8\-EAS671_0005\:3\:1\:1043\:4432/){ | |
333 # print "HEAD:\t$header"; | |
334 # print "SEQ:\n$seq\n"; | |
335 # print "$quality_ori\n"; | |
336 # print "$quality\n"; | |
337 # for (my $i=0;$i<=$#bad_position;$i++){ | |
338 # print $bad_position[$i]."(".$q[$bad_position[$i]]." : ".ord($q[$bad_position[$i]]).")"."\t"; | |
339 # } | |
340 # print "\n"; | |
341 # print "$startTrim .. $stopTrim / $lengthTrim \n"; | |
342 # print $fastq_lines; | |
343 # print "\n"; | |
313 # } | 344 # } |
314 # print "\n"; | 345 |
315 | 346 if ($lengthTrim >= $MIN_LENGTH){ |
316 if ($lengthnoN >= $MIN_LENGTH){ | 347 $fastq_lines .= $header; |
317 my $startTrim = $startnoN; | 348 $fastq_lines .= substr($seq,$startTrim,$lengthTrim)."\n"; |
318 my $stopTrim = $stopnoN; | 349 $fastq_lines .= "+\n"; |
319 | 350 $fastq_lines .= substr($quality,$startTrim,$lengthTrim)."\n"; |
320 my $quality_converted=""; | 351 return $fastq_lines; |
321 #my @bad_position; | |
322 | |
323 my @q = split(//,$quality); | |
324 #print "QUALITY\n"; | |
325 #print "$quality\n"; | |
326 for (my $i=0;$i<=$stopnoN;$i++){ | |
327 my $chr = $q[$i]; | |
328 my $num = ord($q[$i]); | |
329 if ($TYPE eq "illumina"){ | |
330 $num = $num -64+33; | |
331 $quality_converted .= chr($num); | |
332 } | |
333 | |
334 if ($num <$MIN_QUALITY + 64 - 33 ){ | |
335 push(@bad_position,$i+$startnoN); | |
336 } | |
337 } | |
338 if ($quality_converted){$quality = $quality_converted;} | |
339 #print "$quality\n"; | |
340 | |
341 | |
342 | |
343 if ($#bad_position>=0){ | |
344 @bad_position = sort {$a <=> $b} @bad_position; | |
345 # for (my $i=0;$i<=$#bad_position;$i++){ | |
346 # print $bad_position[$i]."\t"; | |
347 # } | |
348 # print "\n"; | |
349 my %coord=%{&extract_longer_string_coordinates_from_bad_position($startnoN,$stopnoN,\@bad_position)}; | |
350 $startTrim = $coord{"start"}; | |
351 $stopTrim = $coord{"stop"}; | |
352 #print "$startTrim .. $stopTrim\n"; | |
353 | |
354 } | |
355 my $lengthTrim = $stopTrim - $startTrim +1; | |
356 | |
357 | |
358 my $fastq_lines=""; | |
359 | |
360 if ($lengthTrim >= $MIN_LENGTH){ | |
361 $fastq_lines .= $header; | |
362 $fastq_lines .= substr($seq,$startTrim,$lengthTrim)."\n"; | |
363 $fastq_lines .= "+\n"; | |
364 $fastq_lines .= substr($quality,$startTrim,$lengthTrim)."\n"; | |
365 # print $fastq_lines; | |
366 return $fastq_lines; | |
367 | |
368 } | |
369 else { | |
370 return ""; | |
371 } | |
372 | |
373 | |
374 | 352 |
375 } | 353 } |
376 else { | 354 else { |
355 #print "Insufficient length after trimming\n"; | |
377 return ""; | 356 return ""; |
378 } | 357 } |
379 | |
380 | |
381 # my @s = split(//,$seq); | |
382 # my $sanger_quality=""; | |
383 | |
384 | |
385 | |
386 | |
387 # return $sanger_quality; | |
388 } | 358 } |
389 | 359 |
390 sub extract_longer_string_coordinates_from_bad_position{ | 360 sub extract_longer_string_coordinates_from_bad_position{ |
391 my $start=shift; | 361 my $start=shift; |
392 my $stop =shift; | 362 my $stop =shift; |