Mercurial > repos > brasset_jensen > srnapipe
comparison bin/align.pm @ 20:8ea13dab3435 draft
Uploaded
author | brasset_jensen |
---|---|
date | Sun, 13 May 2018 15:40:18 -0400 |
parents | 1df6aaac800e |
children | 59c728d31244 |
comparison
equal
deleted
inserted
replaced
19:d5bd2bbd655d | 20:8ea13dab3435 |
---|---|
9 use lib $FindBin::Bin; | 9 use lib $FindBin::Bin; |
10 use Rcall qw ( histogram ); | 10 use Rcall qw ( histogram ); |
11 | 11 |
12 use Exporter; | 12 use Exporter; |
13 our @ISA = qw( Exporter ); | 13 our @ISA = qw( Exporter ); |
14 our @EXPORT = qw( &BWA_call &to_build &get_unique &sam_sorted_bam &get_hash_alignment &sam_to_bam_bg &sam_count &sam_count_mis &rpms_rpkm &get_fastq_seq &extract_sam ); | 14 our @EXPORT = qw( &rpms_rpkm_te &BWA_call &to_build &get_unique &sam_sorted_bam &get_hash_alignment &sam_to_bam_bg &sam_count &sam_count_mis &rpms_rpkm &get_fastq_seq &extract_sam ); |
15 | 15 |
16 sub to_build | 16 sub to_build |
17 { | 17 { |
18 my ( $toBuildTabP, $log, $newdir ) = @_; | 18 my ( $toBuildTabP, $log, $newdir ) = @_; |
19 | 19 |
282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) | 282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) |
283 { | 283 { |
284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) | 284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) |
285 { | 285 { |
286 $size{$1} = $2; | 286 $size{$1} = $2; |
287 $number{$1} = 0; | 287 $number{$1} = [0,0,0,0,0]; |
288 $numberNM{$1} = 0; | 288 $numberNM{$1} = [0,0,0,0,0]; |
289 $numberM{$1} = 0; | 289 $numberM{$1} = [0,0,0,0,0]; |
290 } | 290 } |
291 } | 291 } |
292 else | 292 else |
293 { | 293 { |
294 my @line = split (/\t/,$_); | 294 my @line = split (/\t/,$_); |
295 if ( $line[1] & 16 || $line[1] == 0 ) | 295 my @seq = split //, $line[9]; |
296 if ( $line[1] == 16 || $line[1] == 0 ) | |
296 { | 297 { |
297 $number{ $line[2] }++; | 298 $number{ $line[2] }->[0]++; |
299 if ($line[1] == 0) | |
300 { | |
301 $number{$line[2]}->[1]++; | |
302 $number{$line[2]}->[3]++ if $seq[0] eq 'T'; | |
303 } | |
304 else | |
305 { | |
306 $number{$line[2]}->[2]++ if $seq[9] eq 'A'; | |
307 } | |
298 if ($_ =~ /.*XM:i:(\d+).*/) | 308 if ($_ =~ /.*XM:i:(\d+).*/) |
299 { | 309 { |
300 if ( $1 == 0 ){ $numberNM{$line[2]}++; } else { $numberM{$line[2]}++; } | 310 if ( $1 == 0 ) |
311 { | |
312 $numberNM{$line[2]}->[0]++; | |
313 if ($line[1] == 0) | |
314 { | |
315 $numberNM{$line[2]}->[1]++; | |
316 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T'; | |
317 } | |
318 else | |
319 { | |
320 $numberNM{$line[2]}->[2]++; | |
321 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A'; | |
322 } | |
323 } | |
324 else | |
325 { | |
326 $numberM{$line[2]}->[0]++; | |
327 if ($line[1] == 0) | |
328 { | |
329 $numberM{$line[2]}->[1]++; | |
330 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T'; | |
331 } | |
332 else | |
333 { | |
334 $numberM{$line[2]}->[2]++; | |
335 $numberN{$line[2]}->[4]++ if $seq[9] eq 'A'; | |
336 } | |
337 } | |
301 } | 338 } |
302 } | 339 } |
303 } | 340 } |
304 } | 341 } |
305 return (\%number, \%size, \%numberNM, \%numberM ); | 342 return (\%number, \%size, \%numberNM, \%numberM ); |
306 } | 343 } |
344 | |
345 sub rpms_rpkm_te | |
346 { | |
347 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; | |
348 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; | |
349 print $out "ID\treads counts\tRPKM\tsens reads counts\treverse reads counts\t% of sens 1U\t% of reverse 10A"; | |
350 print $out "\tper million of piRNAs" if ($piRNA_number != 0); | |
351 print $out "\tper million of miRNAs" if ($miRNA_number != 0); | |
352 print $out "\tper million of bonafide reads" if ($bonafide_number != 0); | |
353 print $out "\n"; | |
354 foreach my $k ( sort keys %{$counthashR} ) | |
355 { | |
356 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); | |
357 | |
358 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; | |
359 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm; | |
360 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ; | |
361 my $U1 = 0; | |
362 $U1 = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; | |
363 my $A10 = 0; | |
364 $A10 = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; | |
365 | |
366 print $out "\t".$U1."\t".$A10; | |
367 | |
368 if ($piRNA_number != 0 ) | |
369 { | |
370 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number; | |
371 printf $out "\t%.2f",$pirna; | |
372 } | |
373 if ($miRNA_number != 0 ) | |
374 { | |
375 $mirna = ( $counthashR->{$k}->[0] * 1000000) / $miRNA_number; | |
376 printf $out "\t%.2f",$mirna; | |
377 } | |
378 if ($bonafide_number != 0 ) | |
379 { | |
380 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number; | |
381 printf $out "\t%.2f",$bonafide; | |
382 } | |
383 print $out "\n"; | |
384 } | |
385 close $out; | |
386 } | |
387 | |
307 | 388 |
308 sub sam_to_bam_bg | 389 sub sam_to_bam_bg |
309 { | 390 { |
310 my ( $sam, $scale, $number_of_cpus ) = @_; | 391 my ( $sam, $scale, $number_of_cpus ) = @_; |
311 my ( $bam_sorted, $bedgraphM, $bedgraphP, $view_err, $sort_err ) = ( '', '', '', '', '' ); | 392 my ( $bam_sorted, $bedgraphM, $bedgraphP, $view_err, $sort_err ) = ( '', '', '', '', '' ); |