comparison bin/align.pm @ 27:c8090766c3f5 draft

Uploaded
author brasset_jensen
date Sun, 20 May 2018 17:46:23 -0400
parents e26d3bee9348
children 8d8357dee6b3
comparison
equal deleted inserted replaced
26:b3be854a4273 27:c8090766c3f5
282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) 282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ )
283 { 283 {
284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) 284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/)
285 { 285 {
286 $size{$1} = $2; 286 $size{$1} = $2;
287 $number{$1} = [0,0,0,0,0]; 287 $number{$1} = [0,0,0,0,0,0,0];
288 $numberNM{$1} = [0,0,0,0,0]; 288 $numberNM{$1} = [0,0,0,0,0,0,0];
289 $numberM{$1} = [0,0,0,0,0]; 289 $numberM{$1} = [0,0,0,0,0,0,0];
290 } 290 }
291 } 291 }
292 else 292 else
293 { 293 {
294 my @line = split (/\t/,$_); 294 my @line = split (/\t/,$_);
298 $number{ $line[2] }->[0]++; 298 $number{ $line[2] }->[0]++;
299 if ($line[1] == 0) 299 if ($line[1] == 0)
300 { 300 {
301 $number{$line[2]}->[1]++; 301 $number{$line[2]}->[1]++;
302 $number{$line[2]}->[3]++ if $seq[0] eq 'T'; 302 $number{$line[2]}->[3]++ if $seq[0] eq 'T';
303 $number{$line[2]}->[5]++ if $seq[9] eq 'A';
303 } 304 }
304 else 305 else
305 { 306 {
306 $number{$line[2]}->[2]++; 307 $number{$line[2]}->[2]++;
307 $number{$line[2]}->[4]++ if $seq[9] eq 'A'; 308 $number{$line[2]}->[4]++ if $seq[9] eq 'A';
309 $number{$line[2]}->[6]++ if $seq[0] eq 'T';
308 } 310 }
309 if ($_ =~ /.*XM:i:(\d+).*/) 311 if ($_ =~ /.*XM:i:(\d+).*/)
310 { 312 {
311 if ( $1 == 0 ) 313 if ( $1 == 0 )
312 { 314 {
313 $numberNM{$line[2]}->[0]++; 315 $numberNM{$line[2]}->[0]++;
314 if ($line[1] == 0) 316 if ($line[1] == 0)
315 { 317 {
316 $numberNM{$line[2]}->[1]++; 318 $numberNM{$line[2]}->[1]++;
317 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T'; 319 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T';
320 $numberNM{$line[2]}->[5]++ if $seq[9] eq 'A';
318 } 321 }
319 else 322 else
320 { 323 {
321 $numberNM{$line[2]}->[2]++; 324 $numberNM{$line[2]}->[2]++;
322 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A'; 325 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A';
326 $numberNM{$line[2]}->[6]++ if $seq[0] eq 'T';
323 } 327 }
324 } 328 }
325 else 329 else
326 { 330 {
327 $numberM{$line[2]}->[0]++; 331 $numberM{$line[2]}->[0]++;
328 if ($line[1] == 0) 332 if ($line[1] == 0)
329 { 333 {
330 $numberM{$line[2]}->[1]++; 334 $numberM{$line[2]}->[1]++;
331 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T'; 335 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T';
336 $numberM{$line[2]}->[5]++ if $seq[9] eq 'A';
332 } 337 }
333 else 338 else
334 { 339 {
335 $numberM{$line[2]}->[2]++; 340 $numberM{$line[2]}->[2]++;
336 $numberM{$line[2]}->[4]++ if $seq[9] eq 'A'; 341 $numberM{$line[2]}->[4]++ if $seq[9] eq 'A';
342 $numberM{$line[2]}->[6]++ if $seq[0] eq 'T';
337 } 343 }
338 } 344 }
339 } 345 }
340 } 346 }
341 } 347 }
345 351
346 sub rpms_rpkm_te 352 sub rpms_rpkm_te
347 { 353 {
348 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; 354 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_;
349 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; 355 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n";
350 print $out "ID\treads counts\tRPKM\tsens reads counts\treverse reads counts\t% of sens 1U\t% of reverse 10A"; 356 print $out "ID\treads counts\tRPKM";
351 print $out "\tper million of piRNAs" if ($piRNA_number != 0); 357 print $out "\tper million of piRNAs" if ($piRNA_number != 0);
352 print $out "\tper million of miRNAs" if ($miRNA_number != 0); 358 print $out "\tper million of miRNAs" if ($miRNA_number != 0);
353 print $out "\tper million of bonafide reads" if ($bonafide_number != 0); 359 print $out "\tper million of bonafide reads" if ($bonafide_number != 0);
354 print $out "\n"; 360 print $out "\tsens reads counts\treverse reads counts";
361 print $out "\t% of sens 1U\t% of sens 10A\t% of reverse 1U\t% of reverse 10A\n";
355 foreach my $k ( sort keys %{$counthashR} ) 362 foreach my $k ( sort keys %{$counthashR} )
356 { 363 {
357 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); 364 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0);
358 365
359 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; 366 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ;
360 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm; 367 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm;
361 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ;
362 my $U1 = 0;
363 $U1 = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0;
364 my $A10 = 0;
365 $A10 = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0;
366
367 print $out "\t".$U1."\t".$A10;
368 368
369 if ($piRNA_number != 0 ) 369 if ($piRNA_number != 0 )
370 { 370 {
371 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number; 371 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number;
372 printf $out "\t%.2f",$pirna; 372 printf $out "\t%.2f",$pirna;
379 if ($bonafide_number != 0 ) 379 if ($bonafide_number != 0 )
380 { 380 {
381 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number; 381 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number;
382 printf $out "\t%.2f",$bonafide; 382 printf $out "\t%.2f",$bonafide;
383 } 383 }
384
385 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ;
386 my $S1U = 0;
387 $S1U = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0;
388 my $R1U = 0;
389 $R1U = $counthashR->{$k}->[6] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0;
390 my $S10A = 0;
391 $SA10 = $counthashR->{$k}->[5] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0;
392 my $R10A = 0;
393 $R10A = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0;
394
395 print $out "\t".$S1U."\t".$S10A."\t".$R1U."\t".$R10A;
396
384 print $out "\n"; 397 print $out "\n";
385 } 398 }
386 close $out; 399 close $out;
387 } 400 }
388 401