annotate lib/sRNAPipe/subgroups.pm @ 64:967512924317 draft

planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
author brasset_jensen
date Mon, 28 Jan 2019 11:57:15 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
64
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
1 package sRNAPipe::subgroups;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
2
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
3 use strict;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
4 use warnings;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
5 use Exporter;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
6 our @ISA = qw( Exporter );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
7 our @EXPORT_OK = qw( &subgroups );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
8
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
9 use POSIX;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
10 use File::Copy;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
11 use FindBin;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
12 use lib "$FindBin::Bin/../lib";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
13 use sRNAPipe::align qw ( get_hash_alignment );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
14
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
15 sub subgroups
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
16 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
17 my ($fin, $dir, $mis, $mis_TE, $proc, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $min_si, $max_si, $min_pi, $max_pi, $report ) = @_;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
18 my (@files, $sum, $pie, $repar, %ismapped, %isjunk, %repartition, @junk_ref, @all_ref );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
19
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
20 srand();
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
21 print $report "----------------------------\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
22 print $report "Create subgroups:\nfastq_in: $fin\ndirectory_out: $dir\nmismatches: $mis\nmismatches TE: $mis_TE\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
23
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
24 mkdir $dir;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
25 $dir = $dir.'/' unless $dir =~ /(.*)\/$/;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
26
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
27 my $accept_miRNas = $dir.'miRNAs.fastq';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
28 my $reject_miRNAs = $dir.'miRNAs_rejected.fastq';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
29 my $sam_miRNAs = $dir.'miRNAs.sam';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
30 my @tmp = get_hash_alignment($miRNAs, $mis, 1, 1, $accept_miRNas, $reject_miRNAs, $fin, $proc, 'miRNAs',$sam_miRNAs, $report);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
31 my $mi = $tmp[0]; my $sam = '';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
32 $repartition{'miRNAs'} = $mi;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
33
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
34
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
35 my $reject_rRNAs = $dir.'rRNAs_rejected.fastq';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
36 if ( $rRNAs eq 'None')
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
37 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
38 move($reject_miRNAs,$reject_rRNAs);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
39 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
40 else
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
41 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
42 $sam = new String::Random;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
43 $sam = $sam->randpattern("CCcccccc");
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
44 @tmp = get_hash_alignment($rRNAs, $mis, 0, 1, 'NA', $reject_rRNAs, $reject_miRNAs, $proc, 'rRNAs', $sam, $report);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
45 $repartition{'rRNAs'} = $tmp[0];
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
46 unlink $sam, $sam.'_aln.err', $sam.'_samse.err';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
47 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
48
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
49 my $reject_tRNAs = $dir.'rRNAs_rejected.fastq';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
50 if ( $rRNAs eq 'None')
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
51 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
52 move($reject_rRNAs,$reject_tRNAs);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
53 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
54 else
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
55 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
56 $sam = new String::Random;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
57 $sam = $sam->randpattern("CCcccccc");
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
58 @tmp = get_hash_alignment($tRNAs, $mis, 0, 1, 'NA', $reject_tRNAs, $reject_rRNAs, $proc, 'tRNAs', $sam, $report);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
59 $repartition{'tRNAs'} = $tmp[0];
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
60 unlink $sam, $sam.'_aln.err', $sam.'_samse.err';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
61 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
62
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
63
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
64 my $bonafide = $dir.'bonafide_reads.fastq';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
65 if ( $rRNAs eq 'None')
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
66 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
67 move($reject_tRNAs,$bonafide);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
68 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
69 else
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
70 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
71 $sam = new String::Random;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
72 $sam = $sam->randpattern("CCcccccc");
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
73 @tmp = get_hash_alignment($snRNAs, $mis, 0, 1, 'NA', $bonafide, $reject_tRNAs, $proc, 'snRNAs', $sam, $report);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
74 $repartition{'snRNAs'} = $tmp[0];
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
75
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
76 unlink $sam, $sam.'_aln.err', $sam.'_samse.err';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
77 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
78 my $bo = $tmp[1];
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
79
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
80 my $sam_transcripts = $dir.'transcripts.sam';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
81 my $reject_transcripts = $dir.'rejected_transcripts.fastq';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
82 @tmp = get_hash_alignment($transcripts, $mis, 0, 1, 'NA', $reject_transcripts, $bonafide, $proc, 'transcripts', $sam_transcripts, $report, $dir.'transcripts.fai');
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
83 $repartition{'transcripts'} = $tmp[0];
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
84
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
85
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
86 my $sam_TEs = $dir.'TEs.sam';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
87 my $reject_TEs = $dir.'rejected.fastq';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
88 @tmp = get_hash_alignment($TE, $mis_TE, 0, 1, 'NA', $reject_TEs, $reject_transcripts, $proc, 'TEs', $sam_TEs, $report, $dir.'TEs.fai' );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
89 $repartition{'TEs'} = $tmp[0] ; $repartition{'others'} = $tmp[1];
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
90 unlink $sam, $sam.'_aln.err', $sam.'_samse.err';
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
91 unlink $reject_transcripts;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
92 unlink $reject_rRNAs;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
93 unlink $reject_miRNAs;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
94 unlink $reject_tRNAs;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
95
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
96 #create repartition
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
97 my $pi = fastqSubgroups($bonafide, $dir, $min_si, $max_si, $min_pi, $max_pi );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
98
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
99 open (my $re, '>'.$dir.'repartition.txt') || die "cannot open $dir repartition.txt $!\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
100 print $re "type\tnumber\tpercentage\n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
101 $sum += $_ foreach values %repartition;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
102 foreach my $k ( sort keys %repartition )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
103 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
104 my $prct = 0;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
105 $prct = $repartition{$k} / $sum * 100 if $sum != 0;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
106 print $re "$k\t$repartition{$k}\t"; printf $re "%.2f\n",$prct;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
107 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
108 return ( $bo, $mi, $pi);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
109 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
110
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
111 sub fastqSubgroups
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
112 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
113 my ( $fastq, $output_directory, $min_si, $max_si, $min_pi, $max_pi ) = @_;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
114 my $fastq_siRNA = $output_directory."siRNAs.fastq";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
115 my $fastq_piRNA = $output_directory."piRNAs.fastq";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
116
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
117 open my $fic, '<', $fastq || die "cannot open input file $! \n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
118 open my $si, '>', $fastq_siRNA || die "cannot open siRNA.fastq $! \n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
119 open my $pi, '>', $fastq_piRNA || die "cannot open piRNA.fastq $! \n";
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
120
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
121 my ($length, $cmp, $type, $siRNA_number, $miRNA_h_number, $piRNA_number, $not_pi_number) = (0,0,0,0,0,0,0);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
122 my (@fastq) =(); my $seq_name;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
123 my $out;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
124 while(<$fic>)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
125 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
126 chomp $_;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
127 $cmp++;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
128 if ($cmp == 1)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
129 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
130 die "file do not contain a @ at line $cmp\n" unless ($_ =~ /^\@/ );
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
131 $type = 0; @fastq = ();
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
132 if ($_ =~ /^\@(.*)\s.*/) { $seq_name = $1;}
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
133 elsif ($_ =~ /^\@(.*)/) {$seq_name = $1;}
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
134 push(@fastq,$_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
135 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
136 elsif ($cmp == 2)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
137 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
138 #die "unrecognized symbol at line $cmp\n" unless $_ =~ /[atcgATCGnN]+/;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
139 push(@fastq,$_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
140 $length = length($_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
141 $type = 0;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
142 if ( $length >= $min_si && $length <= $max_si )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
143 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
144 $type = 2;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
145 $siRNA_number++;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
146 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
147 if ($length >= $min_pi && $length <= $max_pi )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
148 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
149 $type += 4;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
150 $piRNA_number++;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
151 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
152 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
153 elsif ($cmp == 3 )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
154 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
155 die "file do not contain a + at line $cmp\n" unless $_ =~ /^\+/;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
156 push(@fastq,$_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
157 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
158 elsif ($cmp == 4 )
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
159 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
160 push(@fastq,$_);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
161 $cmp = 0;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
162 if ($type != 0)
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
163 {
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
164 if ($type & 4 ) { foreach my $t (@fastq) { print $pi $t."\n";} }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
165 if ($type & 2 ) { foreach my $t (@fastq) { print $si $t."\n";} }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
166 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
167 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
168 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
169 close $fic;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
170 close $si; close $pi;
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
171 return ($piRNA_number);
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
172 }
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
173
967512924317 planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 410509088292be0687b8da3ea3bb75e72866a87d
brasset_jensen
parents:
diff changeset
174 1;