srnapipe: bin/resize.pm comparison

comparison bin/resize.pm @ 1:1df6aaac800e draft

Deleted selected files

author	brasset_jensen
date	Wed, 13 Dec 2017 10:40:50 -0500
parents
children	4bc00caa60b4

comparison

equal deleted inserted replaced

-:e4e71401c577
+:1df6aaac800e
+package resize;
+use strict;
+use warnings;
+use FindBin;
+use lib $FindBin::Bin;
+use Rcall qw ( histogram );
+use Exporter;
+our @ISA = qw( Exporter );
+our @EXPORT_OK = qw( &size_distribution );
+sub size_distribution
+{
+my ( $fastq, $fastq_out, $dir, $min, $max ) = @_;
+my ( %fragments_size, %duplicates ) ;
+my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates);
+my $png = $dir.'histogram.png';
+histogram(\%fragments_size, $png, $num);
+my $size = $dir.'reads_size.txt';
+my $pourcentage;
+open  my $o, '>', $size || die "cannot open $size $!\n";
+print $o "size\tnumber\tpercentage\n";
+foreach my  $k (sort { $a <=> $b } keys %fragments_size )
+{
+$pourcentage = $fragments_size{$k} / $num * 100;
+print $o "$k\t$fragments_size{$k}\t";
+printf $o "%.2f\n",$pourcentage;
+}
+close $o;
+my $dup = $dir.'duplicates.txt' ;
+open $o, '>', $dup || die "cannot open $size $!\n";
+print $o "size\tnumber\n";
+foreach my  $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates )
+{
+print $o "$k\t$duplicates{$k}\n";
+}
+close $o;
+}
+sub size
+{
+my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_;
+my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0);
+my @fastq;
+open (my $in, $in_file) || die "cannot open $in_file $!\n";
+open (my $out, ">".$out_file)  || die "cannot create $out_file $!\n";
+while(<$in>)
+{
+chomp $_;
+$cmp++; $line++;
+if ($cmp == 1)
+{
+die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ );
+$ok = 0; @fastq = ();
+push(@fastq,$_);
+}
+elsif ($cmp == 2)
+{
+#die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ );
+push(@fastq,$_);
+$size = length($_);
+if ($size >= $min && $size <= $max)
+{
+$numreads++;
+${$sizeHashR}{$size}+=1;
+${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR));
+$ok = 1;
+}
+}
+elsif ($cmp == 3 )
+{
+die "file do not contain a + at line $line\n" unless $_ =~ /^\+/;
+push(@fastq,$_);
+}
+elsif ($cmp == 4 )
+{
+push(@fastq,$_);
+$cmp = 0;
+if ($ok == 1)
+{
+foreach my $t (@fastq)
+{
+print $out $t."\n";
+}
+}
+}
+}
+close $in; close $out;
+return $numreads;
+}
+1;

Mercurial > repos > brasset_jensen > srnapipe

comparison bin/resize.pm @ 1:1df6aaac800e draft