diff bin/resize.pm @ 40:4bc00caa60b4 draft

Uploaded
author pierre.pouchin
date Thu, 12 Jul 2018 08:27:08 -0400
parents 1df6aaac800e
children 9185ca0a7b43
line wrap: on
line diff
--- a/bin/resize.pm	Thu Jul 12 08:24:21 2018 -0400
+++ b/bin/resize.pm	Thu Jul 12 08:27:08 2018 -0400
@@ -1,101 +1,101 @@
-package resize;
-
-use strict;
-use warnings;
-
-use FindBin;
-use lib $FindBin::Bin;
-use Rcall qw ( histogram );
-
-use Exporter;
-our @ISA = qw( Exporter );
-our @EXPORT_OK = qw( &size_distribution );
-
-sub size_distribution
-{
-  my ( $fastq, $fastq_out, $dir, $min, $max ) = @_;
-
-  my ( %fragments_size, %duplicates ) ;
-  my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates);
-
-  my $png = $dir.'histogram.png';
-  histogram(\%fragments_size, $png, $num);
-  
-  my $size = $dir.'reads_size.txt';
-  
-
-  my $pourcentage;
-  open  my $o, '>', $size || die "cannot open $size $!\n";
-  print $o "size\tnumber\tpercentage\n";
-  foreach my  $k (sort { $a <=> $b } keys %fragments_size )
-  {
-    $pourcentage = $fragments_size{$k} / $num * 100;
-    
-    print $o "$k\t$fragments_size{$k}\t";
-    printf $o "%.2f\n",$pourcentage;
-  }
-  close $o;
-  
-  my $dup = $dir.'duplicates.txt' ;  
-  open $o, '>', $dup || die "cannot open $size $!\n";
-  print $o "size\tnumber\n";
-  foreach my  $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates )
-  {
-    print $o "$k\t$duplicates{$k}\n";
-  }
-  close $o;
-}
-
-sub size
-{
-  my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_;
-  my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0);
-  my @fastq;
-  open (my $in, $in_file) || die "cannot open $in_file $!\n";
-  open (my $out, ">".$out_file)  || die "cannot create $out_file $!\n";
-  while(<$in>)
-  {
-    chomp $_;
-    $cmp++; $line++;
-    if ($cmp == 1)
-    {
-      die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ );
-      $ok = 0; @fastq = ();
-      push(@fastq,$_);
-    }
-    elsif ($cmp == 2)
-    {
-      #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ );
-      push(@fastq,$_);
-      $size = length($_);
-      if ($size >= $min && $size <= $max)
-      {
-        $numreads++;
-        ${$sizeHashR}{$size}+=1;
-        ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR));
-        $ok = 1;
-      }
-    }
-    elsif ($cmp == 3 )
-    {
-      die "file do not contain a + at line $line\n" unless $_ =~ /^\+/;
-      push(@fastq,$_);
-    }
-    elsif ($cmp == 4 )
-    {
-      push(@fastq,$_);
-      $cmp = 0;
-      if ($ok == 1)
-      {
-        foreach my $t (@fastq)
-        {
-          print $out $t."\n";
-        }
-      }
-    }
-  }
-  close $in; close $out;
-  return $numreads;
-}
-
-1;
+package resize;
+
+use strict;
+use warnings;
+
+use FindBin;
+use lib $FindBin::Bin;
+use Rcall qw ( histogram );
+
+use Exporter;
+our @ISA = qw( Exporter );
+our @EXPORT_OK = qw( &size_distribution );
+
+sub size_distribution
+{
+  my ( $fastq, $fastq_out, $dir, $min, $max ) = @_;
+
+  my ( %fragments_size, %duplicates ) ;
+  my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates);
+
+  my $png = $dir.'histogram.png';
+  histogram(\%fragments_size, $png, $num);
+  
+  my $size = $dir.'reads_size.txt';
+  
+
+  my $pourcentage;
+  open  my $o, '>', $size || die "cannot open $size $!\n";
+  print $o "size\tnumber\tpercentage\n";
+  foreach my  $k (sort { $a <=> $b } keys %fragments_size )
+  {
+    $pourcentage = $fragments_size{$k} / $num * 100;
+    
+    print $o "$k\t$fragments_size{$k}\t";
+    printf $o "%.2f\n",$pourcentage;
+  }
+  close $o;
+  
+  my $dup = $dir.'duplicates.txt' ;  
+  open $o, '>', $dup || die "cannot open $size $!\n";
+  print $o "size\tnumber\n";
+  foreach my  $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates )
+  {
+    print $o "$k\t$duplicates{$k}\n";
+  }
+  close $o;
+}
+
+sub size
+{
+  my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_;
+  my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0);
+  my @fastq;
+  open (my $in, $in_file) || die "cannot open $in_file $!\n";
+  open (my $out, ">".$out_file)  || die "cannot create $out_file $!\n";
+  while(<$in>)
+  {
+    chomp $_;
+    $cmp++; $line++;
+    if ($cmp == 1)
+    {
+      die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ );
+      $ok = 0; @fastq = ();
+      push(@fastq,$_);
+    }
+    elsif ($cmp == 2)
+    {
+      #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ );
+      push(@fastq,$_);
+      $size = length($_);
+      if ($size >= $min && $size <= $max)
+      {
+        $numreads++;
+        ${$sizeHashR}{$size}+=1;
+        ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR));
+        $ok = 1;
+      }
+    }
+    elsif ($cmp == 3 )
+    {
+      die "file do not contain a + at line $line\n" unless $_ =~ /^\+/;
+      push(@fastq,$_);
+    }
+    elsif ($cmp == 4 )
+    {
+      push(@fastq,$_);
+      $cmp = 0;
+      if ($ok == 1)
+      {
+        foreach my $t (@fastq)
+        {
+          print $out $t."\n";
+        }
+      }
+    }
+  }
+  close $in; close $out;
+  return $numreads;
+}
+
+1;