Mercurial > repos > brasset_jensen > srnapipe
comparison bin/resize.pm @ 61:9185ca0a7b43 draft
Updated package according to recommendations.
author | pierre.pouchin |
---|---|
date | Wed, 16 Jan 2019 08:18:13 -0500 |
parents | 4bc00caa60b4 |
children |
comparison
equal
deleted
inserted
replaced
60:9645d995fb3c | 61:9185ca0a7b43 |
---|---|
11 our @ISA = qw( Exporter ); | 11 our @ISA = qw( Exporter ); |
12 our @EXPORT_OK = qw( &size_distribution ); | 12 our @EXPORT_OK = qw( &size_distribution ); |
13 | 13 |
14 sub size_distribution | 14 sub size_distribution |
15 { | 15 { |
16 my ( $fastq, $fastq_out, $dir, $min, $max ) = @_; | 16 my ( $fastq, $fastq_out, $dir, $min, $max ) = @_; |
17 | 17 |
18 my ( %fragments_size, %duplicates ) ; | 18 my ( %fragments_size, %duplicates ) ; |
19 my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates); | 19 my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates); |
20 | 20 |
21 my $png = $dir.'histogram.png'; | 21 my $png = $dir.'histogram.png'; |
22 histogram(\%fragments_size, $png, $num); | 22 histogram(\%fragments_size, $png, $num); |
23 | 23 |
24 my $size = $dir.'reads_size.txt'; | 24 my $size = $dir.'reads_size.txt'; |
25 | 25 |
26 | 26 |
27 my $pourcentage; | 27 my $pourcentage; |
28 open my $o, '>', $size || die "cannot open $size $!\n"; | 28 open my $o, '>', $size || die "cannot open $size $!\n"; |
29 print $o "size\tnumber\tpercentage\n"; | 29 print $o "size\tnumber\tpercentage\n"; |
30 foreach my $k (sort { $a <=> $b } keys %fragments_size ) | 30 foreach my $k (sort { $a <=> $b } keys %fragments_size ) |
31 { | 31 { |
32 $pourcentage = $fragments_size{$k} / $num * 100; | 32 $pourcentage = $fragments_size{$k} / $num * 100; |
33 | |
34 print $o "$k\t$fragments_size{$k}\t"; | |
35 printf $o "%.2f\n",$pourcentage; | |
36 } | |
37 close $o; | |
33 | 38 |
34 print $o "$k\t$fragments_size{$k}\t"; | 39 my $dup = $dir.'duplicates.txt' ; |
35 printf $o "%.2f\n",$pourcentage; | 40 open $o, '>', $dup || die "cannot open $size $!\n"; |
36 } | 41 print $o "size\tnumber\n"; |
37 close $o; | 42 foreach my $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates ) |
38 | 43 { |
39 my $dup = $dir.'duplicates.txt' ; | 44 print $o "$k\t$duplicates{$k}\n"; |
40 open $o, '>', $dup || die "cannot open $size $!\n"; | 45 } |
41 print $o "size\tnumber\n"; | 46 close $o; |
42 foreach my $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates ) | |
43 { | |
44 print $o "$k\t$duplicates{$k}\n"; | |
45 } | |
46 close $o; | |
47 } | 47 } |
48 | 48 |
49 sub size | 49 sub size |
50 { | 50 { |
51 my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_; | 51 my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_; |
52 my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0); | 52 my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0); |
53 my @fastq; | 53 my @fastq; |
54 open (my $in, $in_file) || die "cannot open $in_file $!\n"; | 54 open (my $in, $in_file) || die "cannot open $in_file $!\n"; |
55 open (my $out, ">".$out_file) || die "cannot create $out_file $!\n"; | 55 open (my $out, ">".$out_file) || die "cannot create $out_file $!\n"; |
56 while(<$in>) | 56 while(<$in>) |
57 { | |
58 chomp $_; | |
59 $cmp++; $line++; | |
60 if ($cmp == 1) | |
61 { | 57 { |
62 die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ ); | 58 chomp $_; |
63 $ok = 0; @fastq = (); | 59 $cmp++; $line++; |
64 push(@fastq,$_); | 60 if ($cmp == 1) |
61 { | |
62 die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ ); | |
63 $ok = 0; @fastq = (); | |
64 push(@fastq,$_); | |
65 } | |
66 elsif ($cmp == 2) | |
67 { | |
68 #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ ); | |
69 push(@fastq,$_); | |
70 $size = length($_); | |
71 if ($size >= $min && $size <= $max) | |
72 { | |
73 $numreads++; | |
74 ${$sizeHashR}{$size}+=1; | |
75 ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR)); | |
76 $ok = 1; | |
77 } | |
78 } | |
79 elsif ($cmp == 3 ) | |
80 { | |
81 die "file do not contain a + at line $line\n" unless $_ =~ /^\+/; | |
82 push(@fastq,$_); | |
83 } | |
84 elsif ($cmp == 4 ) | |
85 { | |
86 push(@fastq,$_); | |
87 $cmp = 0; | |
88 if ($ok == 1) | |
89 { | |
90 foreach my $t (@fastq) | |
91 { | |
92 print $out $t."\n"; | |
93 } | |
94 } | |
95 } | |
65 } | 96 } |
66 elsif ($cmp == 2) | 97 close $in; close $out; |
67 { | 98 return $numreads; |
68 #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ ); | |
69 push(@fastq,$_); | |
70 $size = length($_); | |
71 if ($size >= $min && $size <= $max) | |
72 { | |
73 $numreads++; | |
74 ${$sizeHashR}{$size}+=1; | |
75 ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR)); | |
76 $ok = 1; | |
77 } | |
78 } | |
79 elsif ($cmp == 3 ) | |
80 { | |
81 die "file do not contain a + at line $line\n" unless $_ =~ /^\+/; | |
82 push(@fastq,$_); | |
83 } | |
84 elsif ($cmp == 4 ) | |
85 { | |
86 push(@fastq,$_); | |
87 $cmp = 0; | |
88 if ($ok == 1) | |
89 { | |
90 foreach my $t (@fastq) | |
91 { | |
92 print $out $t."\n"; | |
93 } | |
94 } | |
95 } | |
96 } | |
97 close $in; close $out; | |
98 return $numreads; | |
99 } | 99 } |
100 | 100 |
101 1; | 101 1; |