annotate bin/sRNAPipe.pl @ 61:9185ca0a7b43 draft

Updated package according to recommendations.
author pierre.pouchin
date Wed, 16 Jan 2019 08:18:13 -0500
parents 9645d995fb3c
children 967512924317
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
61
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
1 #!/usr/bin/perl
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
2 use strict;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
3 use warnings;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
4 use Getopt::Long;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
5 use Parallel::ForkManager;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
6 use File::Basename;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
7 use File::Copy::Recursive qw( dircopy );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
8 use POSIX;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
9 use FindBin;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
10 use lib $FindBin::Bin;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
11 use resize qw ( size_distribution );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
12 use subgroups qw (subgroups );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
13 use ppp qw ( ping_pong_partners );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
14 use Rcall qw (pie_chart bg_to_png );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm rpms_rpkm_te BWA_call get_fastq_seq extract_sam sam_to_bam_bg );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
16 use html qw ( main_page details_pages menu_page ppp_page );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
17 use File::Copy;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
18
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
19 if(@ARGV) {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
20 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
21 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
22 my ( $si_min, $si_max, $pi_min, $pi_max );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
23 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
24 my $max_procs = 8;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
25
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
26 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ) = (0,0,0,0,0,0,0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
27 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
28 $Pcheck ='true';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
29
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
30 GetOptions (
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
31 "fastq=s" => \@fastq,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
32 "fastq_n=s" => \@fastq_n,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
33 "dir=s" => \$dir,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
34 "min:i" => \$min,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
35 "max:i" => \$max,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
36 "si_min:i" => \$si_min,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
37 "si_max:i" => \$si_max,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
38 "pi_min:i" => \$pi_min,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
39 "pi_max:i" => \$pi_max,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
40 "mis:i" => \$mis,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
41 "misTE:i" => \$misTE,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
42 "html:s" => \$html_out,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
43 "PPPon:s" => \$Pcheck,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
44 "help" => \$help,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
45 "ref:s" => \$ref,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
46 "tRNAs:s" => \$tRNAs,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
47 "rRNAs:s" => \$rRNAs,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
48 "snRNAs:s" => \$snRNAs,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
49 "miRNAs:s" => \$miRNAs,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
50 "transcripts:s" => \$transcripts,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
51 "TE:s" => \$TE,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
52 "build_index" => \$build_index,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
53 "build_tRNAs" => \$build_tRNAs,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
54 "build_snRNAs" => \$build_snRNAs,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
55 "build_miRNAs" => \$build_miRNAs,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
56 "build_transcripts" => \$build_transcripts,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
57 "build_rRNAs" => \$build_rRNAs,
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
58 "build_TE" => \$build_TE
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
59 );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
60
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
61 my $fq_collection = 'fastq_dir/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
62 mkdir $dir; mkdir $fq_collection;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
63 $dir = $dir.'/' unless $dir =~ /\/$/;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
64 mkdir $dir.'/css';mkdir $dir.'/js';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
65 dircopy( $FindBin::Bin.'/css', $dir.'/css' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
66 dircopy( $FindBin::Bin.'/js', $dir.'/js' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
67
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
68 my $file = $dir.'report.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
69 open my $report, '>', $file or die "Cannot open $file $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
70
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
71 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_transcripts, \$transcripts], [$build_TE, \$TE] );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
72 to_build ( \@toBuild, $report, $dir );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
73
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
74 my $proc_child = ceil($max_procs / scalar(@fastq));
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
75 my $proc_grand_child = ceil($proc_child/4);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
76 my $pm = Parallel::ForkManager->new($max_procs);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
77 my $pm2 = Parallel::ForkManager->new($proc_grand_child);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
78
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
79 $pm->run_on_finish( sub {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
80 my ($pid, $exit_code, $ident) = @_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
81 print $report "Fastq fork $ident just finished ".
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
82 "with PID $pid and exit code: $exit_code\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
83 die "Something went wrong!\n" if $exit_code != 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
84 });
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
85
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
86
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
87 $pm->run_on_start( sub {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
88 my ($pid,$ident)=@_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
89 print $report "Fastq fork : $ident started, pid: $pid\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
90 });
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
91
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
92
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
93 $pm2->run_on_finish( sub {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
94 my ($pid, $exit_code, $ident) = @_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
95 print $report "** Subgroup fork $ident just finished ".
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
96 "with PID $pid and exit code: $exit_code\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
97 die "Something went wrong!\n" if $exit_code != 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
98 });
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
99
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
100
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
101 $pm2->run_on_start( sub {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
102 my ($pid,$ident)=@_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
103 print $report "** Subgroup fork $ident started, pid: $pid\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
104 });
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
105
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
106 foreach my $child ( 0 .. $#fastq )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
107 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
108 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt');
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
109 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
110 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
111 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
112 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $transcripts, @suffix );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
113
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
114 $pm->start($fastq[$child]) and next;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
115
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
116 my $dir_fq = $dir.$fastq_n[$child].'/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
117 mkdir $dir_fq;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
118
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
119 my $gen_dir = $dir_fq.'genome/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
120 mkdir $gen_dir;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
121
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
122 my $size_dir = $dir_fq.'size/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
123 mkdir $size_dir;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
124
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
125 my $fastq_resized = $dir_fq.$name.'_'.$min.'-'.$max.'.fastq';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
126 size_distribution ( $fastq[$child], $fastq_resized, $size_dir, $min, $max );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
127
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
128 my $sam_genome = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'.sam';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
129 my $sam_genome_unique = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'_unique.sam';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
130 my $fastq_prefix = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
131
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
132 BWA_call ( $ref, $fastq_resized, $sam_genome, $mis, $proc_child, $report );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
133 my ( $fai_ref_hashP, $ma, $ma_uni ) = get_unique ( $sam_genome, $sam_genome_unique, $gen_dir, $fq_collection.$fastq_n[$child], 1, $report );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
134
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
135 die "No Reads mapped on the genome reference!\n" if $ma == 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
136 my $scale = 1000000 / $ma;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
137 sam_to_bam_bg ( $sam_genome_unique, $scale, $proc_child );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
138 sam_to_bam_bg ( $sam_genome, $scale, $proc_child );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
139
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
140 my $Gviz_dir = $gen_dir.'Gviz/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
141 my $fai_file = $gen_dir.'fai';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
142 mkdir $Gviz_dir;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
143 my $Gviz_dir_rand = $Gviz_dir.'rand/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
144 mkdir $Gviz_dir_rand;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
145 my $Gviz_dir_uni = $Gviz_dir.'unique/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
146 mkdir $Gviz_dir_uni;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
147
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
148 open my $gfai, '>', $fai_file;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
149 foreach my $k ( sort keys %{$fai_ref_hashP} )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
150 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
151 print $gfai "$k\t$fai_ref_hashP->{$k}\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
152 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
153 close $gfai;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
154 bg_to_png ( $fai_file, $fastq_prefix.'_unique_plus.bedgraph', $fastq_prefix.'_unique_minus.bedgraph', $Gviz_dir_uni, 'Mb' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
155 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
156
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
157 my $group_dir = $dir_fq.'subgroups/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
158 my $fastq_uni = $fq_collection.$fastq_n[$child].'_unique_mappers.fastq';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
159 my $fastq_all = $fq_collection.$fastq_n[$child].'_all_mappers.fastq';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
160 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $si_min, $si_max, $pi_min, $pi_max, $report);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
161
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
162 pie_chart($group_dir);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
163
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
164 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
165 my %dupnum_genome;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
166 my $header = <$dupnum>;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
167 while (<$dupnum>)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
168 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
169 chomp $_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
170 my @dupline = split /\t/, $_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
171 $dupnum_genome{$dupline[0]} = [$dupline[1], $dupline[2]];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
172 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
173 close $dupnum;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
174
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
175 my $mi_sam = $group_dir.'miRNAs.sam';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
176 mkdir $group_dir.'miRNAs/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
177 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
178 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
179
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
180 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
181
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
182 my ( $sam_transcripts, $sam_TEs ) = ( $group_dir.'transcripts.sam', $group_dir.'TEs.sam' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
183 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
184 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs');
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
185 foreach my $grand_child ( 0 .. $#types )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
186 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
187 my $type_dir = $group_dir.$types_names[$grand_child].'/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
188 my $type_prefix = $types_names[$grand_child].'-';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
189 mkdir $type_dir;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
190 $pm2->start($types[$grand_child]) and next;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
191 my ( $type_sam_genome, $type_sam_TEs, $type_sam_transcripts ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'transcripts.sam' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
192 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_transcripts ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'transcripts_unique.sam' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
193 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts_uni.fastq');
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
194 my ( $type_genome_fastq, $type_TEs_fastq, $type_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts.fastq');
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
195 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
196
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
197 if ( $grand_child == 1 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
198 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
199 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
200 BWA_call ( $transcripts, $types[$grand_child], $type_sam_transcripts, $mis, $proc_child, $report );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
201 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
202 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
203 extract_sam ( undef, $type_sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
204 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
205 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
206 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
207 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
208 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
209 extract_sam ( $type_sequence_hashP, $sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
210 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
211 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
212
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
213 my $ex_count_file = $type_dir.$type_prefix.'transcripts_reads_counts.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
214 my ( $ex_count, $ex_ref_size ) = sam_count_mis ( $type_sam_transcripts );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
215 rpms_rpkm_te( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
216
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
217 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
218 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
219 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
220 my $TEs_count_file_noM = $type_dir.$type_prefix.'TEs_reads_counts_nomismatches.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
221 rpms_rpkm_te( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
222 rpms_rpkm_te( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
223 rpms_rpkm_te( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
224
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
225 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
226 sam_sorted_bam ( $type_sam_transcripts, $grand_child ); sam_sorted_bam ( $type_sam_uni_transcripts, $grand_child );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
227 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
228
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
229 my $Gviz_TEs = $type_dir.'Gviz_TEs/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
230 mkdir $Gviz_TEs;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
231 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
232
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
233 my $Gviz_genome= $type_dir.'Gviz_genome/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
234 my $Gviz_genome_rand = $Gviz_genome.'rand/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
235 my $Gviz_genome_uni = $Gviz_genome.'unique/';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
236 mkdir $Gviz_genome; mkdir $Gviz_genome_uni; mkdir $Gviz_genome_rand;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
237
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
238 sam_to_bam_bg ( $type_sam_genome, $scale, $grand_child );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
239 sam_to_bam_bg ( $type_sam_uni_genome, $scale, $grand_child );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
240
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
241 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_unique_plus.bedgraph', $type_dir.$type_prefix.'genome_unique_minus.bedgraph', $Gviz_genome_uni, 'Mb' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
242 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_plus.bedgraph', $type_dir.$type_prefix.'genome_minus.bedgraph', $Gviz_genome_rand, 'Mb' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
243
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
244 #HTML Details
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
245 my $prefix_details_pages = $dir.$fastq_n[$child].'-'.$types_names[$grand_child];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
246 details_pages ( $type_dir, $prefix_details_pages, \@fastq_n, $fastq_n[$child], $misTE, $dir, $Pcheck );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
247
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
248 $pm2->finish();
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
249 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
250 $pm2->wait_all_children;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
251
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
252 if ( $Pcheck eq 'true' )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
253 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
254 my $ppp = $group_dir.'PPPartners/'; mkdir $ppp;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
255 print $report "ping_pong_partners $group_dir/piRNAs/TEs.sam $ppp\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
256 ping_pong_partners ( $group_dir.'TEs.fai', $group_dir.'piRNAs/piRNAs-TEs_sorted.bam', $ppp, $pi_min );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
257 my $ppp_page = $dir.$fastq_n[$child].'-piRNAs-PPP.html';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
258 ppp_page ( $group_dir, $ppp_page, \@fastq_n, $fastq_n[$child], $ppp, $dir );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
259 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
260
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
261 #HTML Main Webpage
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
262 my $index_page = $dir.$fastq_n[$child].'.html';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
263 main_page ( $gen_dir, $index_page, \@fastq_n, $fastq_n[$child], $ma, $ma_uni, $dir );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
264 copy ($index_page, $html_out) if $child == 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
265 #HTML Menu
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
266 my $menu_page = $dir.$fastq_n[$child].'-sub.html';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
267 menu_page ( $group_dir, $menu_page, \@fastq_n, $fastq_n[$child], $min, $max, $si_min, $si_max, $pi_min, $pi_max, $dir );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
268 unlink glob "'$group_dir'*.sam"; unlink glob "'$group_dir'*.fastq";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
269 $pm->finish(); # pass an exit code to finish
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
270 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
271 $pm->wait_all_children;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
272 unlink glob "'$dir'"."dataset_*symlink.fa*";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
273 print $report "Job done!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
274 close $report;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
275 } else {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
276 print "sRNAPipe v1.1\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
277 }