comparison bin/sRNAPipe.pl @ 61:9185ca0a7b43 draft

Updated package according to recommendations.
author pierre.pouchin
date Wed, 16 Jan 2019 08:18:13 -0500
parents 9645d995fb3c
children 967512924317
comparison
equal deleted inserted replaced
60:9645d995fb3c 61:9185ca0a7b43
14 use Rcall qw (pie_chart bg_to_png ); 14 use Rcall qw (pie_chart bg_to_png );
15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm rpms_rpkm_te BWA_call get_fastq_seq extract_sam sam_to_bam_bg ); 15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm rpms_rpkm_te BWA_call get_fastq_seq extract_sam sam_to_bam_bg );
16 use html qw ( main_page details_pages menu_page ppp_page ); 16 use html qw ( main_page details_pages menu_page ppp_page );
17 use File::Copy; 17 use File::Copy;
18 18
19 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out); 19 if(@ARGV) {
20 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE ); 20 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out);
21 my ( $si_min, $si_max, $pi_min, $pi_max ); 21 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE );
22 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ); 22 my ( $si_min, $si_max, $pi_min, $pi_max );
23 my $max_procs = 8; 23 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE );
24 24 my $max_procs = 8;
25 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ) = (0,0,0,0,0,0,0); 25
26 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 ); 26 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ) = (0,0,0,0,0,0,0);
27 $Pcheck ='true'; 27 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 );
28 28 $Pcheck ='true';
29 GetOptions ( 29
30 "fastq=s" => \@fastq, 30 GetOptions (
31 "fastq_n=s" => \@fastq_n, 31 "fastq=s" => \@fastq,
32 "dir=s" => \$dir, 32 "fastq_n=s" => \@fastq_n,
33 "min:i" => \$min, 33 "dir=s" => \$dir,
34 "max:i" => \$max, 34 "min:i" => \$min,
35 "si_min:i" => \$si_min, 35 "max:i" => \$max,
36 "si_max:i" => \$si_max, 36 "si_min:i" => \$si_min,
37 "pi_min:i" => \$pi_min, 37 "si_max:i" => \$si_max,
38 "pi_max:i" => \$pi_max, 38 "pi_min:i" => \$pi_min,
39 "mis:i" => \$mis, 39 "pi_max:i" => \$pi_max,
40 "misTE:i" => \$misTE, 40 "mis:i" => \$mis,
41 "html:s" => \$html_out, 41 "misTE:i" => \$misTE,
42 "PPPon:s" => \$Pcheck, 42 "html:s" => \$html_out,
43 "help" => \$help, 43 "PPPon:s" => \$Pcheck,
44 "ref:s" => \$ref, 44 "help" => \$help,
45 "tRNAs:s" => \$tRNAs, 45 "ref:s" => \$ref,
46 "rRNAs:s" => \$rRNAs, 46 "tRNAs:s" => \$tRNAs,
47 "snRNAs:s" => \$snRNAs, 47 "rRNAs:s" => \$rRNAs,
48 "miRNAs:s" => \$miRNAs, 48 "snRNAs:s" => \$snRNAs,
49 "transcripts:s" => \$transcripts, 49 "miRNAs:s" => \$miRNAs,
50 "TE:s" => \$TE, 50 "transcripts:s" => \$transcripts,
51 "build_index" => \$build_index, 51 "TE:s" => \$TE,
52 "build_tRNAs" => \$build_tRNAs, 52 "build_index" => \$build_index,
53 "build_snRNAs" => \$build_snRNAs, 53 "build_tRNAs" => \$build_tRNAs,
54 "build_miRNAs" => \$build_miRNAs, 54 "build_snRNAs" => \$build_snRNAs,
55 "build_transcripts" => \$build_transcripts, 55 "build_miRNAs" => \$build_miRNAs,
56 "build_rRNAs" => \$build_rRNAs, 56 "build_transcripts" => \$build_transcripts,
57 "build_TE" => \$build_TE 57 "build_rRNAs" => \$build_rRNAs,
58 ); 58 "build_TE" => \$build_TE
59 59 );
60 my $fq_collection = 'fastq_dir/'; 60
61 mkdir $dir; mkdir $fq_collection; 61 my $fq_collection = 'fastq_dir/';
62 $dir = $dir.'/' unless $dir =~ /\/$/; 62 mkdir $dir; mkdir $fq_collection;
63 mkdir $dir.'/css';mkdir $dir.'/js'; 63 $dir = $dir.'/' unless $dir =~ /\/$/;
64 dircopy( $FindBin::Bin.'/css', $dir.'/css' ); 64 mkdir $dir.'/css';mkdir $dir.'/js';
65 dircopy( $FindBin::Bin.'/js', $dir.'/js' ); 65 dircopy( $FindBin::Bin.'/css', $dir.'/css' );
66 66 dircopy( $FindBin::Bin.'/js', $dir.'/js' );
67 my $file = $dir.'report.txt'; 67
68 open my $report, '>', $file or die "Cannot open $file $!\n"; 68 my $file = $dir.'report.txt';
69 69 open my $report, '>', $file or die "Cannot open $file $!\n";
70 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_transcripts, \$transcripts], [$build_TE, \$TE] ); 70
71 to_build ( \@toBuild, $report, $dir ); 71 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_transcripts, \$transcripts], [$build_TE, \$TE] );
72 72 to_build ( \@toBuild, $report, $dir );
73 my $proc_child = ceil($max_procs / scalar(@fastq)); 73
74 my $proc_grand_child = ceil($proc_child/4); 74 my $proc_child = ceil($max_procs / scalar(@fastq));
75 my $pm = Parallel::ForkManager->new($max_procs); 75 my $proc_grand_child = ceil($proc_child/4);
76 my $pm2 = Parallel::ForkManager->new($proc_grand_child); 76 my $pm = Parallel::ForkManager->new($max_procs);
77 77 my $pm2 = Parallel::ForkManager->new($proc_grand_child);
78 $pm->run_on_finish( sub { 78
79 my ($pid, $exit_code, $ident) = @_; 79 $pm->run_on_finish( sub {
80 print $report "Fastq fork $ident just finished ". 80 my ($pid, $exit_code, $ident) = @_;
81 "with PID $pid and exit code: $exit_code\n"; 81 print $report "Fastq fork $ident just finished ".
82 die "Something went wrong!\n" if $exit_code != 0; 82 "with PID $pid and exit code: $exit_code\n";
83 }); 83 die "Something went wrong!\n" if $exit_code != 0;
84 $pm->run_on_start( sub { 84 });
85 my ($pid,$ident)=@_; 85
86 print $report "Fastq fork : $ident started, pid: $pid\n"; 86
87 }); 87 $pm->run_on_start( sub {
88 $pm2->run_on_finish( sub { 88 my ($pid,$ident)=@_;
89 my ($pid, $exit_code, $ident) = @_; 89 print $report "Fastq fork : $ident started, pid: $pid\n";
90 print $report "** Subgroup fork $ident just finished ". 90 });
91 "with PID $pid and exit code: $exit_code\n"; 91
92 die "Something went wrong!\n" if $exit_code != 0; 92
93 }); 93 $pm2->run_on_finish( sub {
94 $pm2->run_on_start( sub { 94 my ($pid, $exit_code, $ident) = @_;
95 my ($pid,$ident)=@_; 95 print $report "** Subgroup fork $ident just finished ".
96 print $report "** Subgroup fork $ident started, pid: $pid\n"; 96 "with PID $pid and exit code: $exit_code\n";
97 }); 97 die "Something went wrong!\n" if $exit_code != 0;
98 98 });
99 99
100 foreach my $child ( 0 .. $#fastq ) 100
101 { 101 $pm2->run_on_start( sub {
102 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt'); 102 my ($pid,$ident)=@_;
103 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix ); 103 print $report "** Subgroup fork $ident started, pid: $pid\n";
104 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix ); 104 });
105 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix ); 105
106 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $transcripts, @suffix ); 106 foreach my $child ( 0 .. $#fastq )
107 107 {
108 $pm->start($fastq[$child]) and next; 108 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt');
109 109 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix );
110 my $dir_fq = $dir.$fastq_n[$child].'/'; 110 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix );
111 mkdir $dir_fq; 111 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix );
112 112 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $transcripts, @suffix );
113 my $gen_dir = $dir_fq.'genome/'; 113
114 mkdir $gen_dir; 114 $pm->start($fastq[$child]) and next;
115 115
116 my $size_dir = $dir_fq.'size/'; 116 my $dir_fq = $dir.$fastq_n[$child].'/';
117 mkdir $size_dir; 117 mkdir $dir_fq;
118 118
119 my $fastq_resized = $dir_fq.$name.'_'.$min.'-'.$max.'.fastq'; 119 my $gen_dir = $dir_fq.'genome/';
120 size_distribution ( $fastq[$child], $fastq_resized, $size_dir, $min, $max ); 120 mkdir $gen_dir;
121 121
122 my $sam_genome = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'.sam'; 122 my $size_dir = $dir_fq.'size/';
123 my $sam_genome_unique = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'_unique.sam'; 123 mkdir $size_dir;
124 my $fastq_prefix = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max; 124
125 125 my $fastq_resized = $dir_fq.$name.'_'.$min.'-'.$max.'.fastq';
126 BWA_call ( $ref, $fastq_resized, $sam_genome, $mis, $proc_child, $report ); 126 size_distribution ( $fastq[$child], $fastq_resized, $size_dir, $min, $max );
127 my ( $fai_ref_hashP, $ma, $ma_uni ) = get_unique ( $sam_genome, $sam_genome_unique, $gen_dir, $fq_collection.$fastq_n[$child], 1, $report ); 127
128 128 my $sam_genome = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'.sam';
129 die "No Reads mapped on the genome reference!\n" if $ma == 0; 129 my $sam_genome_unique = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'_unique.sam';
130 my $scale = 1000000 / $ma; 130 my $fastq_prefix = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max;
131 sam_to_bam_bg ( $sam_genome_unique, $scale, $proc_child ); 131
132 sam_to_bam_bg ( $sam_genome, $scale, $proc_child ); 132 BWA_call ( $ref, $fastq_resized, $sam_genome, $mis, $proc_child, $report );
133 133 my ( $fai_ref_hashP, $ma, $ma_uni ) = get_unique ( $sam_genome, $sam_genome_unique, $gen_dir, $fq_collection.$fastq_n[$child], 1, $report );
134 my $Gviz_dir = $gen_dir.'Gviz/'; 134
135 my $fai_file = $gen_dir.'fai'; 135 die "No Reads mapped on the genome reference!\n" if $ma == 0;
136 mkdir $Gviz_dir; 136 my $scale = 1000000 / $ma;
137 my $Gviz_dir_rand = $Gviz_dir.'rand/'; 137 sam_to_bam_bg ( $sam_genome_unique, $scale, $proc_child );
138 mkdir $Gviz_dir_rand; 138 sam_to_bam_bg ( $sam_genome, $scale, $proc_child );
139 my $Gviz_dir_uni = $Gviz_dir.'unique/'; 139
140 mkdir $Gviz_dir_uni; 140 my $Gviz_dir = $gen_dir.'Gviz/';
141 141 my $fai_file = $gen_dir.'fai';
142 open my $gfai, '>', $fai_file; 142 mkdir $Gviz_dir;
143 foreach my $k ( sort keys %{$fai_ref_hashP} ) 143 my $Gviz_dir_rand = $Gviz_dir.'rand/';
144 { 144 mkdir $Gviz_dir_rand;
145 print $gfai "$k\t$fai_ref_hashP->{$k}\n"; 145 my $Gviz_dir_uni = $Gviz_dir.'unique/';
146 } 146 mkdir $Gviz_dir_uni;
147 close $gfai; 147
148 bg_to_png ( $fai_file, $fastq_prefix.'_unique_plus.bedgraph', $fastq_prefix.'_unique_minus.bedgraph', $Gviz_dir_uni, 'Mb' ); 148 open my $gfai, '>', $fai_file;
149 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' ); 149 foreach my $k ( sort keys %{$fai_ref_hashP} )
150 150 {
151 my $group_dir = $dir_fq.'subgroups/'; 151 print $gfai "$k\t$fai_ref_hashP->{$k}\n";
152 my $fastq_uni = $fq_collection.$fastq_n[$child].'_unique_mappers.fastq'; 152 }
153 my $fastq_all = $fq_collection.$fastq_n[$child].'_all_mappers.fastq'; 153 close $gfai;
154 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $si_min, $si_max, $pi_min, $pi_max, $report); 154 bg_to_png ( $fai_file, $fastq_prefix.'_unique_plus.bedgraph', $fastq_prefix.'_unique_minus.bedgraph', $Gviz_dir_uni, 'Mb' );
155 155 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' );
156 pie_chart($group_dir); 156
157 157 my $group_dir = $dir_fq.'subgroups/';
158 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!"; 158 my $fastq_uni = $fq_collection.$fastq_n[$child].'_unique_mappers.fastq';
159 my %dupnum_genome; 159 my $fastq_all = $fq_collection.$fastq_n[$child].'_all_mappers.fastq';
160 my $header = <$dupnum>; 160 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $si_min, $si_max, $pi_min, $pi_max, $report);
161 while (<$dupnum>) 161
162 { 162 pie_chart($group_dir);
163 chomp $_; 163
164 my @dupline = split /\t/, $_; 164 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!";
165 $dupnum_genome{$dupline[0]} = [$dupline[1], $dupline[2]]; 165 my %dupnum_genome;
166 } 166 my $header = <$dupnum>;
167 close $dupnum; 167 while (<$dupnum>)
168 168 {
169 my $mi_sam = $group_dir.'miRNAs.sam'; 169 chomp $_;
170 mkdir $group_dir.'miRNAs/'; 170 my @dupline = split /\t/, $_;
171 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt'; 171 $dupnum_genome{$dupline[0]} = [$dupline[1], $dupline[2]];
172 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam ); 172 }
173 173 close $dupnum;
174 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo ); 174
175 175 my $mi_sam = $group_dir.'miRNAs.sam';
176 my ( $sam_transcripts, $sam_TEs ) = ( $group_dir.'transcripts.sam', $group_dir.'TEs.sam' ); 176 mkdir $group_dir.'miRNAs/';
177 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' ); 177 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt';
178 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs'); 178 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam );
179 foreach my $grand_child ( 0 .. $#types ) 179
180 { 180 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo );
181 my $type_dir = $group_dir.$types_names[$grand_child].'/'; 181
182 my $type_prefix = $types_names[$grand_child].'-'; 182 my ( $sam_transcripts, $sam_TEs ) = ( $group_dir.'transcripts.sam', $group_dir.'TEs.sam' );
183 mkdir $type_dir; 183 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' );
184 $pm2->start($types[$grand_child]) and next; 184 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs');
185 my ( $type_sam_genome, $type_sam_TEs, $type_sam_transcripts ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'transcripts.sam' ); 185 foreach my $grand_child ( 0 .. $#types )
186 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_transcripts ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'transcripts_unique.sam' ); 186 {
187 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts_uni.fastq'); 187 my $type_dir = $group_dir.$types_names[$grand_child].'/';
188 my ( $type_genome_fastq, $type_TEs_fastq, $type_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts.fastq'); 188 my $type_prefix = $types_names[$grand_child].'-';
189 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] ); 189 mkdir $type_dir;
190 190 $pm2->start($types[$grand_child]) and next;
191 if ( $grand_child == 1 ) 191 my ( $type_sam_genome, $type_sam_TEs, $type_sam_transcripts ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'transcripts.sam' );
192 { 192 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_transcripts ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'transcripts_unique.sam' );
193 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report ); 193 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts_uni.fastq');
194 BWA_call ( $transcripts, $types[$grand_child], $type_sam_transcripts, $mis, $proc_child, $report ); 194 my ( $type_genome_fastq, $type_TEs_fastq, $type_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts.fastq');
195 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report ); 195 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] );
196 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq ); 196
197 extract_sam ( undef, $type_sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq ); 197 if ( $grand_child == 1 )
198 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); 198 {
199 } 199 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report );
200 else 200 BWA_call ( $transcripts, $types[$grand_child], $type_sam_transcripts, $mis, $proc_child, $report );
201 { 201 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report );
202 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq ); 202 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq );
203 extract_sam ( $type_sequence_hashP, $sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq ); 203 extract_sam ( undef, $type_sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq );
204 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); 204 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq );
205 } 205 }
206 206 else
207 my $ex_count_file = $type_dir.$type_prefix.'transcripts_reads_counts.txt'; 207 {
208 my ( $ex_count, $ex_ref_size ) = sam_count_mis ( $type_sam_transcripts ); 208 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq );
209 rpms_rpkm_te( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo ); 209 extract_sam ( $type_sequence_hashP, $sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq );
210 210 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq );
211 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs ); 211 }
212 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt'; 212
213 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt'; 213 my $ex_count_file = $type_dir.$type_prefix.'transcripts_reads_counts.txt';
214 my $TEs_count_file_noM = $type_dir.$type_prefix.'TEs_reads_counts_nomismatches.txt'; 214 my ( $ex_count, $ex_ref_size ) = sam_count_mis ( $type_sam_transcripts );
215 rpms_rpkm_te( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo ); 215 rpms_rpkm_te( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo );
216 rpms_rpkm_te( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo ); 216
217 rpms_rpkm_te( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo ); 217 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs );
218 218 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt';
219 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child ); 219 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt';
220 sam_sorted_bam ( $type_sam_transcripts, $grand_child ); sam_sorted_bam ( $type_sam_uni_transcripts, $grand_child ); 220 my $TEs_count_file_noM = $type_dir.$type_prefix.'TEs_reads_counts_nomismatches.txt';
221 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child ); 221 rpms_rpkm_te( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo );
222 222 rpms_rpkm_te( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo );
223 my $Gviz_TEs = $type_dir.'Gviz_TEs/'; 223 rpms_rpkm_te( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo );
224 mkdir $Gviz_TEs; 224
225 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' ); 225 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child );
226 226 sam_sorted_bam ( $type_sam_transcripts, $grand_child ); sam_sorted_bam ( $type_sam_uni_transcripts, $grand_child );
227 my $Gviz_genome= $type_dir.'Gviz_genome/'; 227 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child );
228 my $Gviz_genome_rand = $Gviz_genome.'rand/'; 228
229 my $Gviz_genome_uni = $Gviz_genome.'unique/'; 229 my $Gviz_TEs = $type_dir.'Gviz_TEs/';
230 mkdir $Gviz_genome; mkdir $Gviz_genome_uni; mkdir $Gviz_genome_rand; 230 mkdir $Gviz_TEs;
231 231 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' );
232 sam_to_bam_bg ( $type_sam_genome, $scale, $grand_child ); 232
233 sam_to_bam_bg ( $type_sam_uni_genome, $scale, $grand_child ); 233 my $Gviz_genome= $type_dir.'Gviz_genome/';
234 234 my $Gviz_genome_rand = $Gviz_genome.'rand/';
235 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_unique_plus.bedgraph', $type_dir.$type_prefix.'genome_unique_minus.bedgraph', $Gviz_genome_uni, 'Mb' ); 235 my $Gviz_genome_uni = $Gviz_genome.'unique/';
236 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_plus.bedgraph', $type_dir.$type_prefix.'genome_minus.bedgraph', $Gviz_genome_rand, 'Mb' ); 236 mkdir $Gviz_genome; mkdir $Gviz_genome_uni; mkdir $Gviz_genome_rand;
237 237
238 #HTML Details 238 sam_to_bam_bg ( $type_sam_genome, $scale, $grand_child );
239 my $prefix_details_pages = $dir.$fastq_n[$child].'-'.$types_names[$grand_child]; 239 sam_to_bam_bg ( $type_sam_uni_genome, $scale, $grand_child );
240 details_pages ( $type_dir, $prefix_details_pages, \@fastq_n, $fastq_n[$child], $misTE, $dir, $Pcheck ); 240
241 241 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_unique_plus.bedgraph', $type_dir.$type_prefix.'genome_unique_minus.bedgraph', $Gviz_genome_uni, 'Mb' );
242 $pm2->finish(); 242 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_plus.bedgraph', $type_dir.$type_prefix.'genome_minus.bedgraph', $Gviz_genome_rand, 'Mb' );
243 } 243
244 $pm2->wait_all_children; 244 #HTML Details
245 245 my $prefix_details_pages = $dir.$fastq_n[$child].'-'.$types_names[$grand_child];
246 if ( $Pcheck eq 'true' ) 246 details_pages ( $type_dir, $prefix_details_pages, \@fastq_n, $fastq_n[$child], $misTE, $dir, $Pcheck );
247 { 247
248 my $ppp = $group_dir.'PPPartners/'; mkdir $ppp; 248 $pm2->finish();
249 print $report "ping_pong_partners $group_dir/piRNAs/TEs.sam $ppp\n"; 249 }
250 ping_pong_partners ( $group_dir.'TEs.fai', $group_dir.'piRNAs/piRNAs-TEs_sorted.bam', $ppp, $pi_min ); 250 $pm2->wait_all_children;
251 my $ppp_page = $dir.$fastq_n[$child].'-piRNAs-PPP.html'; 251
252 ppp_page ( $group_dir, $ppp_page, \@fastq_n, $fastq_n[$child], $ppp, $dir ); 252 if ( $Pcheck eq 'true' )
253 } 253 {
254 254 my $ppp = $group_dir.'PPPartners/'; mkdir $ppp;
255 #HTML Main Webpage 255 print $report "ping_pong_partners $group_dir/piRNAs/TEs.sam $ppp\n";
256 my $index_page = $dir.$fastq_n[$child].'.html'; 256 ping_pong_partners ( $group_dir.'TEs.fai', $group_dir.'piRNAs/piRNAs-TEs_sorted.bam', $ppp, $pi_min );
257 main_page ( $gen_dir, $index_page, \@fastq_n, $fastq_n[$child], $ma, $ma_uni, $dir ); 257 my $ppp_page = $dir.$fastq_n[$child].'-piRNAs-PPP.html';
258 copy ($index_page, $html_out) if $child == 0; 258 ppp_page ( $group_dir, $ppp_page, \@fastq_n, $fastq_n[$child], $ppp, $dir );
259 #HTML Menu 259 }
260 my $menu_page = $dir.$fastq_n[$child].'-sub.html'; 260
261 menu_page ( $group_dir, $menu_page, \@fastq_n, $fastq_n[$child], $min, $max, $si_min, $si_max, $pi_min, $pi_max, $dir ); 261 #HTML Main Webpage
262 unlink glob "'$group_dir'*.sam"; unlink glob "'$group_dir'*.fastq"; 262 my $index_page = $dir.$fastq_n[$child].'.html';
263 $pm->finish(); # pass an exit code to finish 263 main_page ( $gen_dir, $index_page, \@fastq_n, $fastq_n[$child], $ma, $ma_uni, $dir );
264 copy ($index_page, $html_out) if $child == 0;
265 #HTML Menu
266 my $menu_page = $dir.$fastq_n[$child].'-sub.html';
267 menu_page ( $group_dir, $menu_page, \@fastq_n, $fastq_n[$child], $min, $max, $si_min, $si_max, $pi_min, $pi_max, $dir );
268 unlink glob "'$group_dir'*.sam"; unlink glob "'$group_dir'*.fastq";
269 $pm->finish(); # pass an exit code to finish
270 }
271 $pm->wait_all_children;
272 unlink glob "'$dir'"."dataset_*symlink.fa*";
273 print $report "Job done!\n";
274 close $report;
275 } else {
276 print "sRNAPipe v1.1\n";
264 } 277 }
265 $pm->wait_all_children;
266 unlink glob "'$dir'"."dataset_*symlink.fa*";
267 print $report "Job done!\n";
268 close $report;