Mercurial > repos > brasset_jensen > srnapipe
annotate bin/resize.pm @ 61:9185ca0a7b43 draft
Updated package according to recommendations.
| author | pierre.pouchin | 
|---|---|
| date | Wed, 16 Jan 2019 08:18:13 -0500 | 
| parents | 4bc00caa60b4 | 
| children | 
| rev | line source | 
|---|---|
| 61 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 1 package resize; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 2 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 3 use strict; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 4 use warnings; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 5 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 6 use FindBin; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 7 use lib $FindBin::Bin; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 8 use Rcall qw ( histogram ); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 9 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 10 use Exporter; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 11 our @ISA = qw( Exporter ); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 12 our @EXPORT_OK = qw( &size_distribution ); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 13 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 14 sub size_distribution | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 15 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 16 my ( $fastq, $fastq_out, $dir, $min, $max ) = @_; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 17 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 18 my ( %fragments_size, %duplicates ) ; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 19 my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 20 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 21 my $png = $dir.'histogram.png'; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 22 histogram(\%fragments_size, $png, $num); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 23 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 24 my $size = $dir.'reads_size.txt'; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 25 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 26 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 27 my $pourcentage; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 28 open my $o, '>', $size || die "cannot open $size $!\n"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 29 print $o "size\tnumber\tpercentage\n"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 30 foreach my $k (sort { $a <=> $b } keys %fragments_size ) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 31 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 32 $pourcentage = $fragments_size{$k} / $num * 100; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 33 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 34 print $o "$k\t$fragments_size{$k}\t"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 35 printf $o "%.2f\n",$pourcentage; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 36 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 37 close $o; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 38 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 39 my $dup = $dir.'duplicates.txt' ; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 40 open $o, '>', $dup || die "cannot open $size $!\n"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 41 print $o "size\tnumber\n"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 42 foreach my $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates ) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 43 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 44 print $o "$k\t$duplicates{$k}\n"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 45 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 46 close $o; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 47 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 48 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 49 sub size | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 50 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 51 my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 52 my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 53 my @fastq; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 54 open (my $in, $in_file) || die "cannot open $in_file $!\n"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 55 open (my $out, ">".$out_file) || die "cannot create $out_file $!\n"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 56 while(<$in>) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 57 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 58 chomp $_; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 59 $cmp++; $line++; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 60 if ($cmp == 1) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 61 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 62 die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ ); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 63 $ok = 0; @fastq = (); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 64 push(@fastq,$_); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 65 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 66 elsif ($cmp == 2) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 67 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 68 #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ ); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 69 push(@fastq,$_); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 70 $size = length($_); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 71 if ($size >= $min && $size <= $max) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 72 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 73 $numreads++; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 74 ${$sizeHashR}{$size}+=1; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 75 ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR)); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 76 $ok = 1; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 77 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 78 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 79 elsif ($cmp == 3 ) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 80 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 81 die "file do not contain a + at line $line\n" unless $_ =~ /^\+/; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 82 push(@fastq,$_); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 83 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 84 elsif ($cmp == 4 ) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 85 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 86 push(@fastq,$_); | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 87 $cmp = 0; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 88 if ($ok == 1) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 89 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 90 foreach my $t (@fastq) | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 91 { | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 92 print $out $t."\n"; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 93 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 94 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 95 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 96 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 97 close $in; close $out; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 98 return $numreads; | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 99 } | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 100 | 
| 
9185ca0a7b43
Updated package according to recommendations.
 pierre.pouchin parents: 
40diff
changeset | 101 1; | 
