Mercurial > repos > brasset_jensen > srnapipe
comparison bin/align.pm @ 27:c8090766c3f5 draft
Uploaded
author | brasset_jensen |
---|---|
date | Sun, 20 May 2018 17:46:23 -0400 |
parents | e26d3bee9348 |
children | 8d8357dee6b3 |
comparison
equal
deleted
inserted
replaced
26:b3be854a4273 | 27:c8090766c3f5 |
---|---|
282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) | 282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) |
283 { | 283 { |
284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) | 284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) |
285 { | 285 { |
286 $size{$1} = $2; | 286 $size{$1} = $2; |
287 $number{$1} = [0,0,0,0,0]; | 287 $number{$1} = [0,0,0,0,0,0,0]; |
288 $numberNM{$1} = [0,0,0,0,0]; | 288 $numberNM{$1} = [0,0,0,0,0,0,0]; |
289 $numberM{$1} = [0,0,0,0,0]; | 289 $numberM{$1} = [0,0,0,0,0,0,0]; |
290 } | 290 } |
291 } | 291 } |
292 else | 292 else |
293 { | 293 { |
294 my @line = split (/\t/,$_); | 294 my @line = split (/\t/,$_); |
298 $number{ $line[2] }->[0]++; | 298 $number{ $line[2] }->[0]++; |
299 if ($line[1] == 0) | 299 if ($line[1] == 0) |
300 { | 300 { |
301 $number{$line[2]}->[1]++; | 301 $number{$line[2]}->[1]++; |
302 $number{$line[2]}->[3]++ if $seq[0] eq 'T'; | 302 $number{$line[2]}->[3]++ if $seq[0] eq 'T'; |
303 $number{$line[2]}->[5]++ if $seq[9] eq 'A'; | |
303 } | 304 } |
304 else | 305 else |
305 { | 306 { |
306 $number{$line[2]}->[2]++; | 307 $number{$line[2]}->[2]++; |
307 $number{$line[2]}->[4]++ if $seq[9] eq 'A'; | 308 $number{$line[2]}->[4]++ if $seq[9] eq 'A'; |
309 $number{$line[2]}->[6]++ if $seq[0] eq 'T'; | |
308 } | 310 } |
309 if ($_ =~ /.*XM:i:(\d+).*/) | 311 if ($_ =~ /.*XM:i:(\d+).*/) |
310 { | 312 { |
311 if ( $1 == 0 ) | 313 if ( $1 == 0 ) |
312 { | 314 { |
313 $numberNM{$line[2]}->[0]++; | 315 $numberNM{$line[2]}->[0]++; |
314 if ($line[1] == 0) | 316 if ($line[1] == 0) |
315 { | 317 { |
316 $numberNM{$line[2]}->[1]++; | 318 $numberNM{$line[2]}->[1]++; |
317 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T'; | 319 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T'; |
320 $numberNM{$line[2]}->[5]++ if $seq[9] eq 'A'; | |
318 } | 321 } |
319 else | 322 else |
320 { | 323 { |
321 $numberNM{$line[2]}->[2]++; | 324 $numberNM{$line[2]}->[2]++; |
322 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A'; | 325 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A'; |
326 $numberNM{$line[2]}->[6]++ if $seq[0] eq 'T'; | |
323 } | 327 } |
324 } | 328 } |
325 else | 329 else |
326 { | 330 { |
327 $numberM{$line[2]}->[0]++; | 331 $numberM{$line[2]}->[0]++; |
328 if ($line[1] == 0) | 332 if ($line[1] == 0) |
329 { | 333 { |
330 $numberM{$line[2]}->[1]++; | 334 $numberM{$line[2]}->[1]++; |
331 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T'; | 335 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T'; |
336 $numberM{$line[2]}->[5]++ if $seq[9] eq 'A'; | |
332 } | 337 } |
333 else | 338 else |
334 { | 339 { |
335 $numberM{$line[2]}->[2]++; | 340 $numberM{$line[2]}->[2]++; |
336 $numberM{$line[2]}->[4]++ if $seq[9] eq 'A'; | 341 $numberM{$line[2]}->[4]++ if $seq[9] eq 'A'; |
342 $numberM{$line[2]}->[6]++ if $seq[0] eq 'T'; | |
337 } | 343 } |
338 } | 344 } |
339 } | 345 } |
340 } | 346 } |
341 } | 347 } |
345 | 351 |
346 sub rpms_rpkm_te | 352 sub rpms_rpkm_te |
347 { | 353 { |
348 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; | 354 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; |
349 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; | 355 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; |
350 print $out "ID\treads counts\tRPKM\tsens reads counts\treverse reads counts\t% of sens 1U\t% of reverse 10A"; | 356 print $out "ID\treads counts\tRPKM"; |
351 print $out "\tper million of piRNAs" if ($piRNA_number != 0); | 357 print $out "\tper million of piRNAs" if ($piRNA_number != 0); |
352 print $out "\tper million of miRNAs" if ($miRNA_number != 0); | 358 print $out "\tper million of miRNAs" if ($miRNA_number != 0); |
353 print $out "\tper million of bonafide reads" if ($bonafide_number != 0); | 359 print $out "\tper million of bonafide reads" if ($bonafide_number != 0); |
354 print $out "\n"; | 360 print $out "\tsens reads counts\treverse reads counts"; |
361 print $out "\t% of sens 1U\t% of sens 10A\t% of reverse 1U\t% of reverse 10A\n"; | |
355 foreach my $k ( sort keys %{$counthashR} ) | 362 foreach my $k ( sort keys %{$counthashR} ) |
356 { | 363 { |
357 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); | 364 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); |
358 | 365 |
359 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; | 366 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; |
360 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm; | 367 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm; |
361 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ; | |
362 my $U1 = 0; | |
363 $U1 = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; | |
364 my $A10 = 0; | |
365 $A10 = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; | |
366 | |
367 print $out "\t".$U1."\t".$A10; | |
368 | 368 |
369 if ($piRNA_number != 0 ) | 369 if ($piRNA_number != 0 ) |
370 { | 370 { |
371 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number; | 371 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number; |
372 printf $out "\t%.2f",$pirna; | 372 printf $out "\t%.2f",$pirna; |
379 if ($bonafide_number != 0 ) | 379 if ($bonafide_number != 0 ) |
380 { | 380 { |
381 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number; | 381 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number; |
382 printf $out "\t%.2f",$bonafide; | 382 printf $out "\t%.2f",$bonafide; |
383 } | 383 } |
384 | |
385 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ; | |
386 my $S1U = 0; | |
387 $S1U = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; | |
388 my $R1U = 0; | |
389 $R1U = $counthashR->{$k}->[6] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; | |
390 my $S10A = 0; | |
391 $SA10 = $counthashR->{$k}->[5] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; | |
392 my $R10A = 0; | |
393 $R10A = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; | |
394 | |
395 print $out "\t".$S1U."\t".$S10A."\t".$R1U."\t".$R10A; | |
396 | |
384 print $out "\n"; | 397 print $out "\n"; |
385 } | 398 } |
386 close $out; | 399 close $out; |
387 } | 400 } |
388 | 401 |