Mercurial > repos > brasset_jensen > srnapipe
comparison bin/sRNAPipe.pl @ 61:9185ca0a7b43 draft
Updated package according to recommendations.
author | pierre.pouchin |
---|---|
date | Wed, 16 Jan 2019 08:18:13 -0500 |
parents | 9645d995fb3c |
children | 967512924317 |
comparison
equal
deleted
inserted
replaced
60:9645d995fb3c | 61:9185ca0a7b43 |
---|---|
14 use Rcall qw (pie_chart bg_to_png ); | 14 use Rcall qw (pie_chart bg_to_png ); |
15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm rpms_rpkm_te BWA_call get_fastq_seq extract_sam sam_to_bam_bg ); | 15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm rpms_rpkm_te BWA_call get_fastq_seq extract_sam sam_to_bam_bg ); |
16 use html qw ( main_page details_pages menu_page ppp_page ); | 16 use html qw ( main_page details_pages menu_page ppp_page ); |
17 use File::Copy; | 17 use File::Copy; |
18 | 18 |
19 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out); | 19 if(@ARGV) { |
20 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE ); | 20 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out); |
21 my ( $si_min, $si_max, $pi_min, $pi_max ); | 21 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE ); |
22 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ); | 22 my ( $si_min, $si_max, $pi_min, $pi_max ); |
23 my $max_procs = 8; | 23 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ); |
24 | 24 my $max_procs = 8; |
25 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ) = (0,0,0,0,0,0,0); | 25 |
26 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 ); | 26 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ) = (0,0,0,0,0,0,0); |
27 $Pcheck ='true'; | 27 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 ); |
28 | 28 $Pcheck ='true'; |
29 GetOptions ( | 29 |
30 "fastq=s" => \@fastq, | 30 GetOptions ( |
31 "fastq_n=s" => \@fastq_n, | 31 "fastq=s" => \@fastq, |
32 "dir=s" => \$dir, | 32 "fastq_n=s" => \@fastq_n, |
33 "min:i" => \$min, | 33 "dir=s" => \$dir, |
34 "max:i" => \$max, | 34 "min:i" => \$min, |
35 "si_min:i" => \$si_min, | 35 "max:i" => \$max, |
36 "si_max:i" => \$si_max, | 36 "si_min:i" => \$si_min, |
37 "pi_min:i" => \$pi_min, | 37 "si_max:i" => \$si_max, |
38 "pi_max:i" => \$pi_max, | 38 "pi_min:i" => \$pi_min, |
39 "mis:i" => \$mis, | 39 "pi_max:i" => \$pi_max, |
40 "misTE:i" => \$misTE, | 40 "mis:i" => \$mis, |
41 "html:s" => \$html_out, | 41 "misTE:i" => \$misTE, |
42 "PPPon:s" => \$Pcheck, | 42 "html:s" => \$html_out, |
43 "help" => \$help, | 43 "PPPon:s" => \$Pcheck, |
44 "ref:s" => \$ref, | 44 "help" => \$help, |
45 "tRNAs:s" => \$tRNAs, | 45 "ref:s" => \$ref, |
46 "rRNAs:s" => \$rRNAs, | 46 "tRNAs:s" => \$tRNAs, |
47 "snRNAs:s" => \$snRNAs, | 47 "rRNAs:s" => \$rRNAs, |
48 "miRNAs:s" => \$miRNAs, | 48 "snRNAs:s" => \$snRNAs, |
49 "transcripts:s" => \$transcripts, | 49 "miRNAs:s" => \$miRNAs, |
50 "TE:s" => \$TE, | 50 "transcripts:s" => \$transcripts, |
51 "build_index" => \$build_index, | 51 "TE:s" => \$TE, |
52 "build_tRNAs" => \$build_tRNAs, | 52 "build_index" => \$build_index, |
53 "build_snRNAs" => \$build_snRNAs, | 53 "build_tRNAs" => \$build_tRNAs, |
54 "build_miRNAs" => \$build_miRNAs, | 54 "build_snRNAs" => \$build_snRNAs, |
55 "build_transcripts" => \$build_transcripts, | 55 "build_miRNAs" => \$build_miRNAs, |
56 "build_rRNAs" => \$build_rRNAs, | 56 "build_transcripts" => \$build_transcripts, |
57 "build_TE" => \$build_TE | 57 "build_rRNAs" => \$build_rRNAs, |
58 ); | 58 "build_TE" => \$build_TE |
59 | 59 ); |
60 my $fq_collection = 'fastq_dir/'; | 60 |
61 mkdir $dir; mkdir $fq_collection; | 61 my $fq_collection = 'fastq_dir/'; |
62 $dir = $dir.'/' unless $dir =~ /\/$/; | 62 mkdir $dir; mkdir $fq_collection; |
63 mkdir $dir.'/css';mkdir $dir.'/js'; | 63 $dir = $dir.'/' unless $dir =~ /\/$/; |
64 dircopy( $FindBin::Bin.'/css', $dir.'/css' ); | 64 mkdir $dir.'/css';mkdir $dir.'/js'; |
65 dircopy( $FindBin::Bin.'/js', $dir.'/js' ); | 65 dircopy( $FindBin::Bin.'/css', $dir.'/css' ); |
66 | 66 dircopy( $FindBin::Bin.'/js', $dir.'/js' ); |
67 my $file = $dir.'report.txt'; | 67 |
68 open my $report, '>', $file or die "Cannot open $file $!\n"; | 68 my $file = $dir.'report.txt'; |
69 | 69 open my $report, '>', $file or die "Cannot open $file $!\n"; |
70 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_transcripts, \$transcripts], [$build_TE, \$TE] ); | 70 |
71 to_build ( \@toBuild, $report, $dir ); | 71 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_transcripts, \$transcripts], [$build_TE, \$TE] ); |
72 | 72 to_build ( \@toBuild, $report, $dir ); |
73 my $proc_child = ceil($max_procs / scalar(@fastq)); | 73 |
74 my $proc_grand_child = ceil($proc_child/4); | 74 my $proc_child = ceil($max_procs / scalar(@fastq)); |
75 my $pm = Parallel::ForkManager->new($max_procs); | 75 my $proc_grand_child = ceil($proc_child/4); |
76 my $pm2 = Parallel::ForkManager->new($proc_grand_child); | 76 my $pm = Parallel::ForkManager->new($max_procs); |
77 | 77 my $pm2 = Parallel::ForkManager->new($proc_grand_child); |
78 $pm->run_on_finish( sub { | 78 |
79 my ($pid, $exit_code, $ident) = @_; | 79 $pm->run_on_finish( sub { |
80 print $report "Fastq fork $ident just finished ". | 80 my ($pid, $exit_code, $ident) = @_; |
81 "with PID $pid and exit code: $exit_code\n"; | 81 print $report "Fastq fork $ident just finished ". |
82 die "Something went wrong!\n" if $exit_code != 0; | 82 "with PID $pid and exit code: $exit_code\n"; |
83 }); | 83 die "Something went wrong!\n" if $exit_code != 0; |
84 $pm->run_on_start( sub { | 84 }); |
85 my ($pid,$ident)=@_; | 85 |
86 print $report "Fastq fork : $ident started, pid: $pid\n"; | 86 |
87 }); | 87 $pm->run_on_start( sub { |
88 $pm2->run_on_finish( sub { | 88 my ($pid,$ident)=@_; |
89 my ($pid, $exit_code, $ident) = @_; | 89 print $report "Fastq fork : $ident started, pid: $pid\n"; |
90 print $report "** Subgroup fork $ident just finished ". | 90 }); |
91 "with PID $pid and exit code: $exit_code\n"; | 91 |
92 die "Something went wrong!\n" if $exit_code != 0; | 92 |
93 }); | 93 $pm2->run_on_finish( sub { |
94 $pm2->run_on_start( sub { | 94 my ($pid, $exit_code, $ident) = @_; |
95 my ($pid,$ident)=@_; | 95 print $report "** Subgroup fork $ident just finished ". |
96 print $report "** Subgroup fork $ident started, pid: $pid\n"; | 96 "with PID $pid and exit code: $exit_code\n"; |
97 }); | 97 die "Something went wrong!\n" if $exit_code != 0; |
98 | 98 }); |
99 | 99 |
100 foreach my $child ( 0 .. $#fastq ) | 100 |
101 { | 101 $pm2->run_on_start( sub { |
102 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt'); | 102 my ($pid,$ident)=@_; |
103 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix ); | 103 print $report "** Subgroup fork $ident started, pid: $pid\n"; |
104 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix ); | 104 }); |
105 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix ); | 105 |
106 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $transcripts, @suffix ); | 106 foreach my $child ( 0 .. $#fastq ) |
107 | 107 { |
108 $pm->start($fastq[$child]) and next; | 108 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt'); |
109 | 109 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix ); |
110 my $dir_fq = $dir.$fastq_n[$child].'/'; | 110 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix ); |
111 mkdir $dir_fq; | 111 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix ); |
112 | 112 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $transcripts, @suffix ); |
113 my $gen_dir = $dir_fq.'genome/'; | 113 |
114 mkdir $gen_dir; | 114 $pm->start($fastq[$child]) and next; |
115 | 115 |
116 my $size_dir = $dir_fq.'size/'; | 116 my $dir_fq = $dir.$fastq_n[$child].'/'; |
117 mkdir $size_dir; | 117 mkdir $dir_fq; |
118 | 118 |
119 my $fastq_resized = $dir_fq.$name.'_'.$min.'-'.$max.'.fastq'; | 119 my $gen_dir = $dir_fq.'genome/'; |
120 size_distribution ( $fastq[$child], $fastq_resized, $size_dir, $min, $max ); | 120 mkdir $gen_dir; |
121 | 121 |
122 my $sam_genome = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'.sam'; | 122 my $size_dir = $dir_fq.'size/'; |
123 my $sam_genome_unique = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'_unique.sam'; | 123 mkdir $size_dir; |
124 my $fastq_prefix = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max; | 124 |
125 | 125 my $fastq_resized = $dir_fq.$name.'_'.$min.'-'.$max.'.fastq'; |
126 BWA_call ( $ref, $fastq_resized, $sam_genome, $mis, $proc_child, $report ); | 126 size_distribution ( $fastq[$child], $fastq_resized, $size_dir, $min, $max ); |
127 my ( $fai_ref_hashP, $ma, $ma_uni ) = get_unique ( $sam_genome, $sam_genome_unique, $gen_dir, $fq_collection.$fastq_n[$child], 1, $report ); | 127 |
128 | 128 my $sam_genome = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'.sam'; |
129 die "No Reads mapped on the genome reference!\n" if $ma == 0; | 129 my $sam_genome_unique = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max.'_unique.sam'; |
130 my $scale = 1000000 / $ma; | 130 my $fastq_prefix = $gen_dir.$fastq_n[$child].'_'.$min.'-'.$max; |
131 sam_to_bam_bg ( $sam_genome_unique, $scale, $proc_child ); | 131 |
132 sam_to_bam_bg ( $sam_genome, $scale, $proc_child ); | 132 BWA_call ( $ref, $fastq_resized, $sam_genome, $mis, $proc_child, $report ); |
133 | 133 my ( $fai_ref_hashP, $ma, $ma_uni ) = get_unique ( $sam_genome, $sam_genome_unique, $gen_dir, $fq_collection.$fastq_n[$child], 1, $report ); |
134 my $Gviz_dir = $gen_dir.'Gviz/'; | 134 |
135 my $fai_file = $gen_dir.'fai'; | 135 die "No Reads mapped on the genome reference!\n" if $ma == 0; |
136 mkdir $Gviz_dir; | 136 my $scale = 1000000 / $ma; |
137 my $Gviz_dir_rand = $Gviz_dir.'rand/'; | 137 sam_to_bam_bg ( $sam_genome_unique, $scale, $proc_child ); |
138 mkdir $Gviz_dir_rand; | 138 sam_to_bam_bg ( $sam_genome, $scale, $proc_child ); |
139 my $Gviz_dir_uni = $Gviz_dir.'unique/'; | 139 |
140 mkdir $Gviz_dir_uni; | 140 my $Gviz_dir = $gen_dir.'Gviz/'; |
141 | 141 my $fai_file = $gen_dir.'fai'; |
142 open my $gfai, '>', $fai_file; | 142 mkdir $Gviz_dir; |
143 foreach my $k ( sort keys %{$fai_ref_hashP} ) | 143 my $Gviz_dir_rand = $Gviz_dir.'rand/'; |
144 { | 144 mkdir $Gviz_dir_rand; |
145 print $gfai "$k\t$fai_ref_hashP->{$k}\n"; | 145 my $Gviz_dir_uni = $Gviz_dir.'unique/'; |
146 } | 146 mkdir $Gviz_dir_uni; |
147 close $gfai; | 147 |
148 bg_to_png ( $fai_file, $fastq_prefix.'_unique_plus.bedgraph', $fastq_prefix.'_unique_minus.bedgraph', $Gviz_dir_uni, 'Mb' ); | 148 open my $gfai, '>', $fai_file; |
149 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' ); | 149 foreach my $k ( sort keys %{$fai_ref_hashP} ) |
150 | 150 { |
151 my $group_dir = $dir_fq.'subgroups/'; | 151 print $gfai "$k\t$fai_ref_hashP->{$k}\n"; |
152 my $fastq_uni = $fq_collection.$fastq_n[$child].'_unique_mappers.fastq'; | 152 } |
153 my $fastq_all = $fq_collection.$fastq_n[$child].'_all_mappers.fastq'; | 153 close $gfai; |
154 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $si_min, $si_max, $pi_min, $pi_max, $report); | 154 bg_to_png ( $fai_file, $fastq_prefix.'_unique_plus.bedgraph', $fastq_prefix.'_unique_minus.bedgraph', $Gviz_dir_uni, 'Mb' ); |
155 | 155 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' ); |
156 pie_chart($group_dir); | 156 |
157 | 157 my $group_dir = $dir_fq.'subgroups/'; |
158 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!"; | 158 my $fastq_uni = $fq_collection.$fastq_n[$child].'_unique_mappers.fastq'; |
159 my %dupnum_genome; | 159 my $fastq_all = $fq_collection.$fastq_n[$child].'_all_mappers.fastq'; |
160 my $header = <$dupnum>; | 160 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $si_min, $si_max, $pi_min, $pi_max, $report); |
161 while (<$dupnum>) | 161 |
162 { | 162 pie_chart($group_dir); |
163 chomp $_; | 163 |
164 my @dupline = split /\t/, $_; | 164 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!"; |
165 $dupnum_genome{$dupline[0]} = [$dupline[1], $dupline[2]]; | 165 my %dupnum_genome; |
166 } | 166 my $header = <$dupnum>; |
167 close $dupnum; | 167 while (<$dupnum>) |
168 | 168 { |
169 my $mi_sam = $group_dir.'miRNAs.sam'; | 169 chomp $_; |
170 mkdir $group_dir.'miRNAs/'; | 170 my @dupline = split /\t/, $_; |
171 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt'; | 171 $dupnum_genome{$dupline[0]} = [$dupline[1], $dupline[2]]; |
172 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam ); | 172 } |
173 | 173 close $dupnum; |
174 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo ); | 174 |
175 | 175 my $mi_sam = $group_dir.'miRNAs.sam'; |
176 my ( $sam_transcripts, $sam_TEs ) = ( $group_dir.'transcripts.sam', $group_dir.'TEs.sam' ); | 176 mkdir $group_dir.'miRNAs/'; |
177 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' ); | 177 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt'; |
178 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs'); | 178 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam ); |
179 foreach my $grand_child ( 0 .. $#types ) | 179 |
180 { | 180 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo ); |
181 my $type_dir = $group_dir.$types_names[$grand_child].'/'; | 181 |
182 my $type_prefix = $types_names[$grand_child].'-'; | 182 my ( $sam_transcripts, $sam_TEs ) = ( $group_dir.'transcripts.sam', $group_dir.'TEs.sam' ); |
183 mkdir $type_dir; | 183 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' ); |
184 $pm2->start($types[$grand_child]) and next; | 184 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs'); |
185 my ( $type_sam_genome, $type_sam_TEs, $type_sam_transcripts ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'transcripts.sam' ); | 185 foreach my $grand_child ( 0 .. $#types ) |
186 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_transcripts ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'transcripts_unique.sam' ); | 186 { |
187 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts_uni.fastq'); | 187 my $type_dir = $group_dir.$types_names[$grand_child].'/'; |
188 my ( $type_genome_fastq, $type_TEs_fastq, $type_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts.fastq'); | 188 my $type_prefix = $types_names[$grand_child].'-'; |
189 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] ); | 189 mkdir $type_dir; |
190 | 190 $pm2->start($types[$grand_child]) and next; |
191 if ( $grand_child == 1 ) | 191 my ( $type_sam_genome, $type_sam_TEs, $type_sam_transcripts ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'transcripts.sam' ); |
192 { | 192 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_transcripts ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'transcripts_unique.sam' ); |
193 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report ); | 193 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs_uni.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts_uni.fastq'); |
194 BWA_call ( $transcripts, $types[$grand_child], $type_sam_transcripts, $mis, $proc_child, $report ); | 194 my ( $type_genome_fastq, $type_TEs_fastq, $type_transcripts_fastq ) = ( $fq_collection.$fastq_n[$child].'-'.$type_prefix.'genome.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'TEs.fastq', $fq_collection.$fastq_n[$child].'-'.$type_prefix.'transcripts.fastq'); |
195 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report ); | 195 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] ); |
196 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq ); | 196 |
197 extract_sam ( undef, $type_sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq ); | 197 if ( $grand_child == 1 ) |
198 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); | 198 { |
199 } | 199 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report ); |
200 else | 200 BWA_call ( $transcripts, $types[$grand_child], $type_sam_transcripts, $mis, $proc_child, $report ); |
201 { | 201 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report ); |
202 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq ); | 202 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq ); |
203 extract_sam ( $type_sequence_hashP, $sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq ); | 203 extract_sam ( undef, $type_sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq ); |
204 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); | 204 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); |
205 } | 205 } |
206 | 206 else |
207 my $ex_count_file = $type_dir.$type_prefix.'transcripts_reads_counts.txt'; | 207 { |
208 my ( $ex_count, $ex_ref_size ) = sam_count_mis ( $type_sam_transcripts ); | 208 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq ); |
209 rpms_rpkm_te( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo ); | 209 extract_sam ( $type_sequence_hashP, $sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq ); |
210 | 210 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); |
211 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs ); | 211 } |
212 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt'; | 212 |
213 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt'; | 213 my $ex_count_file = $type_dir.$type_prefix.'transcripts_reads_counts.txt'; |
214 my $TEs_count_file_noM = $type_dir.$type_prefix.'TEs_reads_counts_nomismatches.txt'; | 214 my ( $ex_count, $ex_ref_size ) = sam_count_mis ( $type_sam_transcripts ); |
215 rpms_rpkm_te( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo ); | 215 rpms_rpkm_te( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo ); |
216 rpms_rpkm_te( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo ); | 216 |
217 rpms_rpkm_te( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo ); | 217 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs ); |
218 | 218 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt'; |
219 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child ); | 219 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt'; |
220 sam_sorted_bam ( $type_sam_transcripts, $grand_child ); sam_sorted_bam ( $type_sam_uni_transcripts, $grand_child ); | 220 my $TEs_count_file_noM = $type_dir.$type_prefix.'TEs_reads_counts_nomismatches.txt'; |
221 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child ); | 221 rpms_rpkm_te( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo ); |
222 | 222 rpms_rpkm_te( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo ); |
223 my $Gviz_TEs = $type_dir.'Gviz_TEs/'; | 223 rpms_rpkm_te( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo ); |
224 mkdir $Gviz_TEs; | 224 |
225 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' ); | 225 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child ); |
226 | 226 sam_sorted_bam ( $type_sam_transcripts, $grand_child ); sam_sorted_bam ( $type_sam_uni_transcripts, $grand_child ); |
227 my $Gviz_genome= $type_dir.'Gviz_genome/'; | 227 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child ); |
228 my $Gviz_genome_rand = $Gviz_genome.'rand/'; | 228 |
229 my $Gviz_genome_uni = $Gviz_genome.'unique/'; | 229 my $Gviz_TEs = $type_dir.'Gviz_TEs/'; |
230 mkdir $Gviz_genome; mkdir $Gviz_genome_uni; mkdir $Gviz_genome_rand; | 230 mkdir $Gviz_TEs; |
231 | 231 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' ); |
232 sam_to_bam_bg ( $type_sam_genome, $scale, $grand_child ); | 232 |
233 sam_to_bam_bg ( $type_sam_uni_genome, $scale, $grand_child ); | 233 my $Gviz_genome= $type_dir.'Gviz_genome/'; |
234 | 234 my $Gviz_genome_rand = $Gviz_genome.'rand/'; |
235 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_unique_plus.bedgraph', $type_dir.$type_prefix.'genome_unique_minus.bedgraph', $Gviz_genome_uni, 'Mb' ); | 235 my $Gviz_genome_uni = $Gviz_genome.'unique/'; |
236 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_plus.bedgraph', $type_dir.$type_prefix.'genome_minus.bedgraph', $Gviz_genome_rand, 'Mb' ); | 236 mkdir $Gviz_genome; mkdir $Gviz_genome_uni; mkdir $Gviz_genome_rand; |
237 | 237 |
238 #HTML Details | 238 sam_to_bam_bg ( $type_sam_genome, $scale, $grand_child ); |
239 my $prefix_details_pages = $dir.$fastq_n[$child].'-'.$types_names[$grand_child]; | 239 sam_to_bam_bg ( $type_sam_uni_genome, $scale, $grand_child ); |
240 details_pages ( $type_dir, $prefix_details_pages, \@fastq_n, $fastq_n[$child], $misTE, $dir, $Pcheck ); | 240 |
241 | 241 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_unique_plus.bedgraph', $type_dir.$type_prefix.'genome_unique_minus.bedgraph', $Gviz_genome_uni, 'Mb' ); |
242 $pm2->finish(); | 242 bg_to_png ( $fai_file, $type_dir.$type_prefix.'genome_plus.bedgraph', $type_dir.$type_prefix.'genome_minus.bedgraph', $Gviz_genome_rand, 'Mb' ); |
243 } | 243 |
244 $pm2->wait_all_children; | 244 #HTML Details |
245 | 245 my $prefix_details_pages = $dir.$fastq_n[$child].'-'.$types_names[$grand_child]; |
246 if ( $Pcheck eq 'true' ) | 246 details_pages ( $type_dir, $prefix_details_pages, \@fastq_n, $fastq_n[$child], $misTE, $dir, $Pcheck ); |
247 { | 247 |
248 my $ppp = $group_dir.'PPPartners/'; mkdir $ppp; | 248 $pm2->finish(); |
249 print $report "ping_pong_partners $group_dir/piRNAs/TEs.sam $ppp\n"; | 249 } |
250 ping_pong_partners ( $group_dir.'TEs.fai', $group_dir.'piRNAs/piRNAs-TEs_sorted.bam', $ppp, $pi_min ); | 250 $pm2->wait_all_children; |
251 my $ppp_page = $dir.$fastq_n[$child].'-piRNAs-PPP.html'; | 251 |
252 ppp_page ( $group_dir, $ppp_page, \@fastq_n, $fastq_n[$child], $ppp, $dir ); | 252 if ( $Pcheck eq 'true' ) |
253 } | 253 { |
254 | 254 my $ppp = $group_dir.'PPPartners/'; mkdir $ppp; |
255 #HTML Main Webpage | 255 print $report "ping_pong_partners $group_dir/piRNAs/TEs.sam $ppp\n"; |
256 my $index_page = $dir.$fastq_n[$child].'.html'; | 256 ping_pong_partners ( $group_dir.'TEs.fai', $group_dir.'piRNAs/piRNAs-TEs_sorted.bam', $ppp, $pi_min ); |
257 main_page ( $gen_dir, $index_page, \@fastq_n, $fastq_n[$child], $ma, $ma_uni, $dir ); | 257 my $ppp_page = $dir.$fastq_n[$child].'-piRNAs-PPP.html'; |
258 copy ($index_page, $html_out) if $child == 0; | 258 ppp_page ( $group_dir, $ppp_page, \@fastq_n, $fastq_n[$child], $ppp, $dir ); |
259 #HTML Menu | 259 } |
260 my $menu_page = $dir.$fastq_n[$child].'-sub.html'; | 260 |
261 menu_page ( $group_dir, $menu_page, \@fastq_n, $fastq_n[$child], $min, $max, $si_min, $si_max, $pi_min, $pi_max, $dir ); | 261 #HTML Main Webpage |
262 unlink glob "'$group_dir'*.sam"; unlink glob "'$group_dir'*.fastq"; | 262 my $index_page = $dir.$fastq_n[$child].'.html'; |
263 $pm->finish(); # pass an exit code to finish | 263 main_page ( $gen_dir, $index_page, \@fastq_n, $fastq_n[$child], $ma, $ma_uni, $dir ); |
264 copy ($index_page, $html_out) if $child == 0; | |
265 #HTML Menu | |
266 my $menu_page = $dir.$fastq_n[$child].'-sub.html'; | |
267 menu_page ( $group_dir, $menu_page, \@fastq_n, $fastq_n[$child], $min, $max, $si_min, $si_max, $pi_min, $pi_max, $dir ); | |
268 unlink glob "'$group_dir'*.sam"; unlink glob "'$group_dir'*.fastq"; | |
269 $pm->finish(); # pass an exit code to finish | |
270 } | |
271 $pm->wait_all_children; | |
272 unlink glob "'$dir'"."dataset_*symlink.fa*"; | |
273 print $report "Job done!\n"; | |
274 close $report; | |
275 } else { | |
276 print "sRNAPipe v1.1\n"; | |
264 } | 277 } |
265 $pm->wait_all_children; | |
266 unlink glob "'$dir'"."dataset_*symlink.fa*"; | |
267 print $report "Job done!\n"; | |
268 close $report; |