annotate lib/sRNAPipe/resize.pm @ 65:0e9adbd82bb4 draft

planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit a623ac00600191204ede63c45862a7fbb561afd2
author brasset_jensen
date Wed, 30 Jan 2019 02:41:24 -0500
parents 967512924317
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
64
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
1 package sRNAPipe::resize;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
2
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
3 use strict;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
4 use warnings;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
5
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
6 use FindBin;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
7 use lib "$FindBin::Bin/../lib";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
8 use sRNAPipe::Rcall qw ( histogram );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
9
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
10 use Exporter;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
11 our @ISA = qw( Exporter );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
12 our @EXPORT_OK = qw( &size_distribution );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
13
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
14 sub size_distribution
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
15 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
16 my ( $fastq, $fastq_out, $dir, $min, $max ) = @_;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
17
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
18 my ( %fragments_size, %duplicates ) ;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
19 my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
20
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
21 my $png = $dir.'histogram.png';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
22 histogram(\%fragments_size, $png, $num);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
23
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
24 my $size = $dir.'reads_size.txt';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
25
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
26
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
27 my $pourcentage;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
28 open my $o, '>', $size || die "cannot open $size $!\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
29 print $o "size\tnumber\tpercentage\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
30 foreach my $k (sort { $a <=> $b } keys %fragments_size )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
31 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
32 $pourcentage = $fragments_size{$k} / $num * 100;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
33
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
34 print $o "$k\t$fragments_size{$k}\t";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
35 printf $o "%.2f\n",$pourcentage;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
36 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
37 close $o;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
38
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
39 my $dup = $dir.'duplicates.txt' ;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
40 open $o, '>', $dup || die "cannot open $size $!\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
41 print $o "size\tnumber\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
42 foreach my $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
43 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
44 print $o "$k\t$duplicates{$k}\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
45 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
46 close $o;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
47 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
48
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
49 sub size
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
50 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
51 my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
52 my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
53 my @fastq;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
54 open (my $in, $in_file) || die "cannot open $in_file $!\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
55 open (my $out, ">".$out_file) || die "cannot create $out_file $!\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
56 while(<$in>)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
57 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
58 chomp $_;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
59 $cmp++; $line++;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
60 if ($cmp == 1)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
61 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
62 die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
63 $ok = 0; @fastq = ();
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
64 push(@fastq,$_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
65 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
66 elsif ($cmp == 2)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
67 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
68 #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
69 push(@fastq,$_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
70 $size = length($_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
71 if ($size >= $min && $size <= $max)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
72 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
73 $numreads++;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
74 ${$sizeHashR}{$size}+=1;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
75 ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR));
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
76 $ok = 1;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
77 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
78 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
79 elsif ($cmp == 3 )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
80 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
81 die "file do not contain a + at line $line\n" unless $_ =~ /^\+/;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
82 push(@fastq,$_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
83 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
84 elsif ($cmp == 4 )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
85 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
86 push(@fastq,$_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
87 $cmp = 0;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
88 if ($ok == 1)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
89 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
90 foreach my $t (@fastq)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
91 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
92 print $out $t."\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
93 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
94 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
95 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
96 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
97 close $in; close $out;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
98 return $numreads;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
99 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
100
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
101 1;