| 
0
 | 
     1 package resize;
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 use strict;
 | 
| 
 | 
     4 use warnings;
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 use FindBin;
 | 
| 
 | 
     7 use lib $FindBin::Bin;
 | 
| 
 | 
     8 use Rcall qw ( histogram );
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 use Exporter;
 | 
| 
 | 
    11 our @ISA = qw( Exporter );
 | 
| 
 | 
    12 our @EXPORT_OK = qw( &size_distribution );
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 sub size_distribution
 | 
| 
 | 
    15 {
 | 
| 
 | 
    16   my ( $fastq, $fastq_out, $dir, $min, $max ) = @_;
 | 
| 
 | 
    17 
 | 
| 
 | 
    18   my ( %fragments_size, %duplicates ) ;
 | 
| 
 | 
    19   my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates);
 | 
| 
 | 
    20 
 | 
| 
 | 
    21   my $png = $dir.'histogram.png';
 | 
| 
 | 
    22    histogram(\%fragments_size, $png, $num);
 | 
| 
 | 
    23   
 | 
| 
 | 
    24   my $size = $dir.'reads_size.txt';
 | 
| 
 | 
    25   
 | 
| 
 | 
    26 
 | 
| 
 | 
    27   my $pourcentage;
 | 
| 
 | 
    28   open  my $o, '>', $size || die "cannot open $size $!\n";
 | 
| 
 | 
    29   print $o "size\tnumber\tpercentage\n";
 | 
| 
 | 
    30   foreach my  $k (sort { $a <=> $b } keys %fragments_size )
 | 
| 
 | 
    31   {
 | 
| 
 | 
    32     $pourcentage = $fragments_size{$k} / $num * 100;
 | 
| 
 | 
    33     
 | 
| 
 | 
    34     print $o "$k\t$fragments_size{$k}\t";
 | 
| 
 | 
    35     printf $o "%.2f\n",$pourcentage;
 | 
| 
 | 
    36   }
 | 
| 
 | 
    37   close $o;
 | 
| 
 | 
    38   
 | 
| 
 | 
    39   my $dup = $dir.'duplicates.txt' ;  
 | 
| 
 | 
    40   open $o, '>', $dup || die "cannot open $size $!\n";
 | 
| 
 | 
    41   print $o "size\tnumber\n";
 | 
| 
 | 
    42   foreach my  $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates )
 | 
| 
 | 
    43   {
 | 
| 
 | 
    44     print $o "$k\t$duplicates{$k}\n";
 | 
| 
 | 
    45   }
 | 
| 
 | 
    46   close $o;
 | 
| 
 | 
    47 }
 | 
| 
 | 
    48 
 | 
| 
 | 
    49 sub size
 | 
| 
 | 
    50 {
 | 
| 
 | 
    51   my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_;
 | 
| 
 | 
    52   my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0);
 | 
| 
 | 
    53   my @fastq;
 | 
| 
 | 
    54   open (my $in, $in_file) || die "cannot open $in_file $!\n";
 | 
| 
 | 
    55   open (my $out, ">".$out_file)  || die "cannot create $out_file $!\n";
 | 
| 
 | 
    56   while(<$in>)
 | 
| 
 | 
    57   {
 | 
| 
 | 
    58     chomp $_;
 | 
| 
 | 
    59     $cmp++; $line++;
 | 
| 
 | 
    60     if ($cmp == 1)
 | 
| 
 | 
    61     {
 | 
| 
 | 
    62       die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ );
 | 
| 
 | 
    63       $ok = 0; @fastq = ();
 | 
| 
 | 
    64       push(@fastq,$_);
 | 
| 
 | 
    65     }
 | 
| 
 | 
    66     elsif ($cmp == 2)
 | 
| 
 | 
    67     {
 | 
| 
 | 
    68       #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ );
 | 
| 
 | 
    69       push(@fastq,$_);
 | 
| 
 | 
    70       $size = length($_);
 | 
| 
 | 
    71       if ($size >= $min && $size <= $max)
 | 
| 
 | 
    72       {
 | 
| 
 | 
    73         $numreads++;
 | 
| 
 | 
    74         ${$sizeHashR}{$size}+=1;
 | 
| 
 | 
    75         ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR));
 | 
| 
 | 
    76         $ok = 1;
 | 
| 
 | 
    77       }
 | 
| 
 | 
    78     }
 | 
| 
 | 
    79     elsif ($cmp == 3 )
 | 
| 
 | 
    80     {
 | 
| 
 | 
    81       die "file do not contain a + at line $line\n" unless $_ =~ /^\+/;
 | 
| 
 | 
    82       push(@fastq,$_);
 | 
| 
 | 
    83     }
 | 
| 
 | 
    84     elsif ($cmp == 4 )
 | 
| 
 | 
    85     {
 | 
| 
 | 
    86       push(@fastq,$_);
 | 
| 
 | 
    87       $cmp = 0;
 | 
| 
 | 
    88       if ($ok == 1)
 | 
| 
 | 
    89       {
 | 
| 
 | 
    90         foreach my $t (@fastq)
 | 
| 
 | 
    91         {
 | 
| 
 | 
    92           print $out $t."\n";
 | 
| 
 | 
    93         }
 | 
| 
 | 
    94       }
 | 
| 
 | 
    95     }
 | 
| 
 | 
    96   }
 | 
| 
 | 
    97   close $in; close $out;
 | 
| 
 | 
    98   return $numreads;
 | 
| 
 | 
    99 }
 | 
| 
 | 
   100 
 | 
| 
 | 
   101 1;
 |