Mercurial > repos > brasset_jensen > srnapipe
comparison bin/align.pm @ 27:c8090766c3f5 draft
Uploaded
| author | brasset_jensen |
|---|---|
| date | Sun, 20 May 2018 17:46:23 -0400 |
| parents | e26d3bee9348 |
| children | 8d8357dee6b3 |
comparison
equal
deleted
inserted
replaced
| 26:b3be854a4273 | 27:c8090766c3f5 |
|---|---|
| 282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) | 282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) |
| 283 { | 283 { |
| 284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) | 284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) |
| 285 { | 285 { |
| 286 $size{$1} = $2; | 286 $size{$1} = $2; |
| 287 $number{$1} = [0,0,0,0,0]; | 287 $number{$1} = [0,0,0,0,0,0,0]; |
| 288 $numberNM{$1} = [0,0,0,0,0]; | 288 $numberNM{$1} = [0,0,0,0,0,0,0]; |
| 289 $numberM{$1} = [0,0,0,0,0]; | 289 $numberM{$1} = [0,0,0,0,0,0,0]; |
| 290 } | 290 } |
| 291 } | 291 } |
| 292 else | 292 else |
| 293 { | 293 { |
| 294 my @line = split (/\t/,$_); | 294 my @line = split (/\t/,$_); |
| 298 $number{ $line[2] }->[0]++; | 298 $number{ $line[2] }->[0]++; |
| 299 if ($line[1] == 0) | 299 if ($line[1] == 0) |
| 300 { | 300 { |
| 301 $number{$line[2]}->[1]++; | 301 $number{$line[2]}->[1]++; |
| 302 $number{$line[2]}->[3]++ if $seq[0] eq 'T'; | 302 $number{$line[2]}->[3]++ if $seq[0] eq 'T'; |
| 303 $number{$line[2]}->[5]++ if $seq[9] eq 'A'; | |
| 303 } | 304 } |
| 304 else | 305 else |
| 305 { | 306 { |
| 306 $number{$line[2]}->[2]++; | 307 $number{$line[2]}->[2]++; |
| 307 $number{$line[2]}->[4]++ if $seq[9] eq 'A'; | 308 $number{$line[2]}->[4]++ if $seq[9] eq 'A'; |
| 309 $number{$line[2]}->[6]++ if $seq[0] eq 'T'; | |
| 308 } | 310 } |
| 309 if ($_ =~ /.*XM:i:(\d+).*/) | 311 if ($_ =~ /.*XM:i:(\d+).*/) |
| 310 { | 312 { |
| 311 if ( $1 == 0 ) | 313 if ( $1 == 0 ) |
| 312 { | 314 { |
| 313 $numberNM{$line[2]}->[0]++; | 315 $numberNM{$line[2]}->[0]++; |
| 314 if ($line[1] == 0) | 316 if ($line[1] == 0) |
| 315 { | 317 { |
| 316 $numberNM{$line[2]}->[1]++; | 318 $numberNM{$line[2]}->[1]++; |
| 317 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T'; | 319 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T'; |
| 320 $numberNM{$line[2]}->[5]++ if $seq[9] eq 'A'; | |
| 318 } | 321 } |
| 319 else | 322 else |
| 320 { | 323 { |
| 321 $numberNM{$line[2]}->[2]++; | 324 $numberNM{$line[2]}->[2]++; |
| 322 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A'; | 325 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A'; |
| 326 $numberNM{$line[2]}->[6]++ if $seq[0] eq 'T'; | |
| 323 } | 327 } |
| 324 } | 328 } |
| 325 else | 329 else |
| 326 { | 330 { |
| 327 $numberM{$line[2]}->[0]++; | 331 $numberM{$line[2]}->[0]++; |
| 328 if ($line[1] == 0) | 332 if ($line[1] == 0) |
| 329 { | 333 { |
| 330 $numberM{$line[2]}->[1]++; | 334 $numberM{$line[2]}->[1]++; |
| 331 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T'; | 335 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T'; |
| 336 $numberM{$line[2]}->[5]++ if $seq[9] eq 'A'; | |
| 332 } | 337 } |
| 333 else | 338 else |
| 334 { | 339 { |
| 335 $numberM{$line[2]}->[2]++; | 340 $numberM{$line[2]}->[2]++; |
| 336 $numberM{$line[2]}->[4]++ if $seq[9] eq 'A'; | 341 $numberM{$line[2]}->[4]++ if $seq[9] eq 'A'; |
| 342 $numberM{$line[2]}->[6]++ if $seq[0] eq 'T'; | |
| 337 } | 343 } |
| 338 } | 344 } |
| 339 } | 345 } |
| 340 } | 346 } |
| 341 } | 347 } |
| 345 | 351 |
| 346 sub rpms_rpkm_te | 352 sub rpms_rpkm_te |
| 347 { | 353 { |
| 348 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; | 354 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; |
| 349 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; | 355 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; |
| 350 print $out "ID\treads counts\tRPKM\tsens reads counts\treverse reads counts\t% of sens 1U\t% of reverse 10A"; | 356 print $out "ID\treads counts\tRPKM"; |
| 351 print $out "\tper million of piRNAs" if ($piRNA_number != 0); | 357 print $out "\tper million of piRNAs" if ($piRNA_number != 0); |
| 352 print $out "\tper million of miRNAs" if ($miRNA_number != 0); | 358 print $out "\tper million of miRNAs" if ($miRNA_number != 0); |
| 353 print $out "\tper million of bonafide reads" if ($bonafide_number != 0); | 359 print $out "\tper million of bonafide reads" if ($bonafide_number != 0); |
| 354 print $out "\n"; | 360 print $out "\tsens reads counts\treverse reads counts"; |
| 361 print $out "\t% of sens 1U\t% of sens 10A\t% of reverse 1U\t% of reverse 10A\n"; | |
| 355 foreach my $k ( sort keys %{$counthashR} ) | 362 foreach my $k ( sort keys %{$counthashR} ) |
| 356 { | 363 { |
| 357 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); | 364 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); |
| 358 | 365 |
| 359 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; | 366 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; |
| 360 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm; | 367 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm; |
| 361 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ; | |
| 362 my $U1 = 0; | |
| 363 $U1 = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; | |
| 364 my $A10 = 0; | |
| 365 $A10 = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; | |
| 366 | |
| 367 print $out "\t".$U1."\t".$A10; | |
| 368 | 368 |
| 369 if ($piRNA_number != 0 ) | 369 if ($piRNA_number != 0 ) |
| 370 { | 370 { |
| 371 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number; | 371 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number; |
| 372 printf $out "\t%.2f",$pirna; | 372 printf $out "\t%.2f",$pirna; |
| 379 if ($bonafide_number != 0 ) | 379 if ($bonafide_number != 0 ) |
| 380 { | 380 { |
| 381 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number; | 381 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number; |
| 382 printf $out "\t%.2f",$bonafide; | 382 printf $out "\t%.2f",$bonafide; |
| 383 } | 383 } |
| 384 | |
| 385 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ; | |
| 386 my $S1U = 0; | |
| 387 $S1U = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; | |
| 388 my $R1U = 0; | |
| 389 $R1U = $counthashR->{$k}->[6] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; | |
| 390 my $S10A = 0; | |
| 391 $SA10 = $counthashR->{$k}->[5] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; | |
| 392 my $R10A = 0; | |
| 393 $R10A = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; | |
| 394 | |
| 395 print $out "\t".$S1U."\t".$S10A."\t".$R1U."\t".$R10A; | |
| 396 | |
| 384 print $out "\n"; | 397 print $out "\n"; |
| 385 } | 398 } |
| 386 close $out; | 399 close $out; |
| 387 } | 400 } |
| 388 | 401 |
