comparison bin/resize.pm @ 61:9185ca0a7b43 draft

Updated package according to recommendations.
author pierre.pouchin
date Wed, 16 Jan 2019 08:18:13 -0500
parents 4bc00caa60b4
children
comparison
equal deleted inserted replaced
60:9645d995fb3c 61:9185ca0a7b43
11 our @ISA = qw( Exporter ); 11 our @ISA = qw( Exporter );
12 our @EXPORT_OK = qw( &size_distribution ); 12 our @EXPORT_OK = qw( &size_distribution );
13 13
14 sub size_distribution 14 sub size_distribution
15 { 15 {
16 my ( $fastq, $fastq_out, $dir, $min, $max ) = @_; 16 my ( $fastq, $fastq_out, $dir, $min, $max ) = @_;
17 17
18 my ( %fragments_size, %duplicates ) ; 18 my ( %fragments_size, %duplicates ) ;
19 my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates); 19 my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates);
20 20
21 my $png = $dir.'histogram.png'; 21 my $png = $dir.'histogram.png';
22 histogram(\%fragments_size, $png, $num); 22 histogram(\%fragments_size, $png, $num);
23 23
24 my $size = $dir.'reads_size.txt'; 24 my $size = $dir.'reads_size.txt';
25 25
26 26
27 my $pourcentage; 27 my $pourcentage;
28 open my $o, '>', $size || die "cannot open $size $!\n"; 28 open my $o, '>', $size || die "cannot open $size $!\n";
29 print $o "size\tnumber\tpercentage\n"; 29 print $o "size\tnumber\tpercentage\n";
30 foreach my $k (sort { $a <=> $b } keys %fragments_size ) 30 foreach my $k (sort { $a <=> $b } keys %fragments_size )
31 { 31 {
32 $pourcentage = $fragments_size{$k} / $num * 100; 32 $pourcentage = $fragments_size{$k} / $num * 100;
33
34 print $o "$k\t$fragments_size{$k}\t";
35 printf $o "%.2f\n",$pourcentage;
36 }
37 close $o;
33 38
34 print $o "$k\t$fragments_size{$k}\t"; 39 my $dup = $dir.'duplicates.txt' ;
35 printf $o "%.2f\n",$pourcentage; 40 open $o, '>', $dup || die "cannot open $size $!\n";
36 } 41 print $o "size\tnumber\n";
37 close $o; 42 foreach my $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates )
38 43 {
39 my $dup = $dir.'duplicates.txt' ; 44 print $o "$k\t$duplicates{$k}\n";
40 open $o, '>', $dup || die "cannot open $size $!\n"; 45 }
41 print $o "size\tnumber\n"; 46 close $o;
42 foreach my $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates )
43 {
44 print $o "$k\t$duplicates{$k}\n";
45 }
46 close $o;
47 } 47 }
48 48
49 sub size 49 sub size
50 { 50 {
51 my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_; 51 my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_;
52 my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0); 52 my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0);
53 my @fastq; 53 my @fastq;
54 open (my $in, $in_file) || die "cannot open $in_file $!\n"; 54 open (my $in, $in_file) || die "cannot open $in_file $!\n";
55 open (my $out, ">".$out_file) || die "cannot create $out_file $!\n"; 55 open (my $out, ">".$out_file) || die "cannot create $out_file $!\n";
56 while(<$in>) 56 while(<$in>)
57 {
58 chomp $_;
59 $cmp++; $line++;
60 if ($cmp == 1)
61 { 57 {
62 die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ ); 58 chomp $_;
63 $ok = 0; @fastq = (); 59 $cmp++; $line++;
64 push(@fastq,$_); 60 if ($cmp == 1)
61 {
62 die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ );
63 $ok = 0; @fastq = ();
64 push(@fastq,$_);
65 }
66 elsif ($cmp == 2)
67 {
68 #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ );
69 push(@fastq,$_);
70 $size = length($_);
71 if ($size >= $min && $size <= $max)
72 {
73 $numreads++;
74 ${$sizeHashR}{$size}+=1;
75 ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR));
76 $ok = 1;
77 }
78 }
79 elsif ($cmp == 3 )
80 {
81 die "file do not contain a + at line $line\n" unless $_ =~ /^\+/;
82 push(@fastq,$_);
83 }
84 elsif ($cmp == 4 )
85 {
86 push(@fastq,$_);
87 $cmp = 0;
88 if ($ok == 1)
89 {
90 foreach my $t (@fastq)
91 {
92 print $out $t."\n";
93 }
94 }
95 }
65 } 96 }
66 elsif ($cmp == 2) 97 close $in; close $out;
67 { 98 return $numreads;
68 #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ );
69 push(@fastq,$_);
70 $size = length($_);
71 if ($size >= $min && $size <= $max)
72 {
73 $numreads++;
74 ${$sizeHashR}{$size}+=1;
75 ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR));
76 $ok = 1;
77 }
78 }
79 elsif ($cmp == 3 )
80 {
81 die "file do not contain a + at line $line\n" unless $_ =~ /^\+/;
82 push(@fastq,$_);
83 }
84 elsif ($cmp == 4 )
85 {
86 push(@fastq,$_);
87 $cmp = 0;
88 if ($ok == 1)
89 {
90 foreach my $t (@fastq)
91 {
92 print $out $t."\n";
93 }
94 }
95 }
96 }
97 close $in; close $out;
98 return $numreads;
99 } 99 }
100 100
101 1; 101 1;