comparison bin/align.pm @ 20:8ea13dab3435 draft

Uploaded
author brasset_jensen
date Sun, 13 May 2018 15:40:18 -0400
parents 1df6aaac800e
children 59c728d31244
comparison
equal deleted inserted replaced
19:d5bd2bbd655d 20:8ea13dab3435
9 use lib $FindBin::Bin; 9 use lib $FindBin::Bin;
10 use Rcall qw ( histogram ); 10 use Rcall qw ( histogram );
11 11
12 use Exporter; 12 use Exporter;
13 our @ISA = qw( Exporter ); 13 our @ISA = qw( Exporter );
14 our @EXPORT = qw( &BWA_call &to_build &get_unique &sam_sorted_bam &get_hash_alignment &sam_to_bam_bg &sam_count &sam_count_mis &rpms_rpkm &get_fastq_seq &extract_sam ); 14 our @EXPORT = qw( &rpms_rpkm_te &BWA_call &to_build &get_unique &sam_sorted_bam &get_hash_alignment &sam_to_bam_bg &sam_count &sam_count_mis &rpms_rpkm &get_fastq_seq &extract_sam );
15 15
16 sub to_build 16 sub to_build
17 { 17 {
18 my ( $toBuildTabP, $log, $newdir ) = @_; 18 my ( $toBuildTabP, $log, $newdir ) = @_;
19 19
282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) 282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ )
283 { 283 {
284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) 284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/)
285 { 285 {
286 $size{$1} = $2; 286 $size{$1} = $2;
287 $number{$1} = 0; 287 $number{$1} = [0,0,0,0,0];
288 $numberNM{$1} = 0; 288 $numberNM{$1} = [0,0,0,0,0];
289 $numberM{$1} = 0; 289 $numberM{$1} = [0,0,0,0,0];
290 } 290 }
291 } 291 }
292 else 292 else
293 { 293 {
294 my @line = split (/\t/,$_); 294 my @line = split (/\t/,$_);
295 if ( $line[1] & 16 || $line[1] == 0 ) 295 my @seq = split //, $line[9];
296 if ( $line[1] == 16 || $line[1] == 0 )
296 { 297 {
297 $number{ $line[2] }++; 298 $number{ $line[2] }->[0]++;
299 if ($line[1] == 0)
300 {
301 $number{$line[2]}->[1]++;
302 $number{$line[2]}->[3]++ if $seq[0] eq 'T';
303 }
304 else
305 {
306 $number{$line[2]}->[2]++ if $seq[9] eq 'A';
307 }
298 if ($_ =~ /.*XM:i:(\d+).*/) 308 if ($_ =~ /.*XM:i:(\d+).*/)
299 { 309 {
300 if ( $1 == 0 ){ $numberNM{$line[2]}++; } else { $numberM{$line[2]}++; } 310 if ( $1 == 0 )
311 {
312 $numberNM{$line[2]}->[0]++;
313 if ($line[1] == 0)
314 {
315 $numberNM{$line[2]}->[1]++;
316 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T';
317 }
318 else
319 {
320 $numberNM{$line[2]}->[2]++;
321 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A';
322 }
323 }
324 else
325 {
326 $numberM{$line[2]}->[0]++;
327 if ($line[1] == 0)
328 {
329 $numberM{$line[2]}->[1]++;
330 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T';
331 }
332 else
333 {
334 $numberM{$line[2]}->[2]++;
335 $numberN{$line[2]}->[4]++ if $seq[9] eq 'A';
336 }
337 }
301 } 338 }
302 } 339 }
303 } 340 }
304 } 341 }
305 return (\%number, \%size, \%numberNM, \%numberM ); 342 return (\%number, \%size, \%numberNM, \%numberM );
306 } 343 }
344
345 sub rpms_rpkm_te
346 {
347 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_;
348 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n";
349 print $out "ID\treads counts\tRPKM\tsens reads counts\treverse reads counts\t% of sens 1U\t% of reverse 10A";
350 print $out "\tper million of piRNAs" if ($piRNA_number != 0);
351 print $out "\tper million of miRNAs" if ($miRNA_number != 0);
352 print $out "\tper million of bonafide reads" if ($bonafide_number != 0);
353 print $out "\n";
354 foreach my $k ( sort keys %{$counthashR} )
355 {
356 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0);
357
358 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ;
359 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm;
360 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ;
361 my $U1 = 0;
362 $U1 = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0;
363 my $A10 = 0;
364 $A10 = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0;
365
366 print $out "\t".$U1."\t".$A10;
367
368 if ($piRNA_number != 0 )
369 {
370 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number;
371 printf $out "\t%.2f",$pirna;
372 }
373 if ($miRNA_number != 0 )
374 {
375 $mirna = ( $counthashR->{$k}->[0] * 1000000) / $miRNA_number;
376 printf $out "\t%.2f",$mirna;
377 }
378 if ($bonafide_number != 0 )
379 {
380 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number;
381 printf $out "\t%.2f",$bonafide;
382 }
383 print $out "\n";
384 }
385 close $out;
386 }
387
307 388
308 sub sam_to_bam_bg 389 sub sam_to_bam_bg
309 { 390 {
310 my ( $sam, $scale, $number_of_cpus ) = @_; 391 my ( $sam, $scale, $number_of_cpus ) = @_;
311 my ( $bam_sorted, $bedgraphM, $bedgraphP, $view_err, $sort_err ) = ( '', '', '', '', '' ); 392 my ( $bam_sorted, $bedgraphM, $bedgraphP, $view_err, $sort_err ) = ( '', '', '', '', '' );