Mercurial > repos > brasset_jensen > srnapipe
annotate bin/resize.pm @ 61:9185ca0a7b43 draft
Updated package according to recommendations.
author | pierre.pouchin |
---|---|
date | Wed, 16 Jan 2019 08:18:13 -0500 |
parents | 4bc00caa60b4 |
children |
rev | line source |
---|---|
61
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
1 package resize; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
2 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
3 use strict; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
4 use warnings; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
5 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
6 use FindBin; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
7 use lib $FindBin::Bin; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
8 use Rcall qw ( histogram ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
9 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
10 use Exporter; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
11 our @ISA = qw( Exporter ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
12 our @EXPORT_OK = qw( &size_distribution ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
13 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
14 sub size_distribution |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
15 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
16 my ( $fastq, $fastq_out, $dir, $min, $max ) = @_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
17 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
18 my ( %fragments_size, %duplicates ) ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
19 my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
20 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
21 my $png = $dir.'histogram.png'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
22 histogram(\%fragments_size, $png, $num); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
23 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
24 my $size = $dir.'reads_size.txt'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
25 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
26 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
27 my $pourcentage; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
28 open my $o, '>', $size || die "cannot open $size $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
29 print $o "size\tnumber\tpercentage\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
30 foreach my $k (sort { $a <=> $b } keys %fragments_size ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
31 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
32 $pourcentage = $fragments_size{$k} / $num * 100; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
33 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
34 print $o "$k\t$fragments_size{$k}\t"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
35 printf $o "%.2f\n",$pourcentage; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
36 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
37 close $o; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
38 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
39 my $dup = $dir.'duplicates.txt' ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
40 open $o, '>', $dup || die "cannot open $size $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
41 print $o "size\tnumber\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
42 foreach my $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
43 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
44 print $o "$k\t$duplicates{$k}\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
45 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
46 close $o; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
47 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
48 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
49 sub size |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
50 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
51 my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
52 my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
53 my @fastq; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
54 open (my $in, $in_file) || die "cannot open $in_file $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
55 open (my $out, ">".$out_file) || die "cannot create $out_file $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
56 while(<$in>) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
57 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
58 chomp $_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
59 $cmp++; $line++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
60 if ($cmp == 1) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
61 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
62 die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
63 $ok = 0; @fastq = (); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
64 push(@fastq,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
65 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
66 elsif ($cmp == 2) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
67 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
68 #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
69 push(@fastq,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
70 $size = length($_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
71 if ($size >= $min && $size <= $max) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
72 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
73 $numreads++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
74 ${$sizeHashR}{$size}+=1; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
75 ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR)); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
76 $ok = 1; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
77 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
78 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
79 elsif ($cmp == 3 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
80 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
81 die "file do not contain a + at line $line\n" unless $_ =~ /^\+/; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
82 push(@fastq,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
83 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
84 elsif ($cmp == 4 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
85 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
86 push(@fastq,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
87 $cmp = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
88 if ($ok == 1) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
89 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
90 foreach my $t (@fastq) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
91 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
92 print $out $t."\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
93 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
94 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
95 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
96 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
97 close $in; close $out; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
98 return $numreads; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
99 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
100 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
40
diff
changeset
|
101 1; |