annotate CoverageReport.pl @ 15:a24c8e81cee0 draft

Uploaded
author geert-vandeweyer
date Wed, 18 Jun 2014 08:14:43 -0400
parents 86df3f847a72
children 95062840f80f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
2
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
3 # load modules
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
4 use Getopt::Std;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
5 use File::Basename;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
6 use Number::Format;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
7
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
8 # number format
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
9 my $de = new Number::Format(-thousands_sep =>',',-decimal_point => '.');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
10
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
11 ##########
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
12 ## opts ##
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
13 ##########
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
14 ## input files
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
15 # b : path to input (b)am file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
16 # t : path to input (t)arget regions in BED format
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
17 ## output files
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
18 # o : report pdf (o)utput file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
19 # z : all plots and tables in tar.g(z) format
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
20 ## entries in the report
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
21 # r : Coverage per (r)egion (boolean)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
22 # s : (s)ubregion coverage if average < specified (plots for positions along target region) (boolean)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
23 # S : (S)ubregion coverage for ALL failed exons => use either s OR S or you will have double plots.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
24 # A : (A)ll exons will be plotted.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
25 # L : (L)ist failed exons instead of plotting
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
26 # m : (m)inimal Coverage threshold
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
27 # f : fraction of average as threshold
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
28 # n : sample (n)ame.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
29
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
30
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
31 getopts('b:t:o:z:rsSALm:n:f:', \%opts) ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
32
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
33 # make output directory in (tmp) working dir
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
34 our $wd = "/tmp/Coverage.".int(rand(1000));
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
35 while (-d $wd) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
36 $wd = "/tmp/Coverage.".int(rand(1000));
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
37 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
38 system("mkdir $wd");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
39
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
40 ## variables
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
41 our %commandsrun = ();
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
42
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
43 if (!exists($opts{'b'}) || !-e $opts{'b'}) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
44 die('Bam File not found');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
45 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
46 if (!exists($opts{'t'}) || !-e $opts{'t'}) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
47 die('Target File (BED) not found');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
48 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
49
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
50 if (exists($opts{'m'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
51 $thresh = $opts{'m'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
52 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
53 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
54 $thresh = 40;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
55 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
56
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
57 if (exists($opts{'f'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
58 $frac = $opts{'f'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
59 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
60 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
61 $frac = 0.2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
62 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
63
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
64 if (exists($opts{'o'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
65 $pdffile = $opts{'o'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
66 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
67 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
68 $pdffile = "$wd/CoverageReport.pdf";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
69 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
70
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
71 if (exists($opts{'z'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
72 $tarfile = $opts{'z'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
73 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
74 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
75 $tarfile = "$wd/Results.tar.gz";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
76 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
77
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
78 # 1. Global Summary => default
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
79 &GlobalSummary($opts{'b'}, $opts{'t'});
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
80
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
81 # 2. Coverage per position
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
82 &SubRegionCoverage($opts{'b'}, $opts{'t'});
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
83 our %filehash;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
84 if (exists($opts{'s'}) || exists($opts{'S'}) || exists($opts{'A'}) || exists($opts{'L'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
85 system("mkdir $wd/SplitFiles");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
86 ## get position coverages
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
87 ## split input files
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
88 open IN, "$wd/Targets.Position.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
89 my $fileidx = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
90 my $currreg = '';
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
91 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
92 my $line = $_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
93 chomp($line);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
94 my @p = split(/\t/,$line);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
95 my $reg = $p[0].'-'.$p[1].'-'.$p[2]; #.$p[3];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
96 my $ex = $p[3];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
97 if ($reg ne $currreg) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
98 ## new exon open new outfile
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
99 if ($currreg ne '') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
100 ## filehandle is open. close it
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
101 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
102 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
103 if (!exists($filehash{$reg})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
104 $fileidx++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
105 $filehash{$reg}{'idx'} = $fileidx;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
106 $filehash{$reg}{'exon'} = $ex;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
107 open OUT, ">> $wd/SplitFiles/File_$fileidx.txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
108 $currreg = $reg;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
109 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
110 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
111 open OUT, ">> $wd/SplitFiles/File_".$filehash{$reg}{'idx'}.".txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
112 $currreg = $reg;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
113 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
114 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
115 ## print the line to the open filehandle.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
116 print OUT "$line\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
117 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
118 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
119 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
120
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
121 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
122
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
123 ## sort output files according to targets file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
124 if (exists($opts{'r'}) ) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
125 my %hash = ();
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
126 open IN, "$wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
127 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
128 my @p = split(/\t/,$_) ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
129 $hash{$p[3]} = $_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
130 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
131 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
132 open OUT, ">$wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
133 open IN, $opts{'t'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
134 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
135 my @p = split(/\t/,$_) ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
136 print OUT $hash{$p[3]};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
137 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
138 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
139 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
140 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
141
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
142
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
143 ####################################
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
144 ## PROCESS RESULTS & CREATE PLOTS ##
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
145 ####################################
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
146 system("mkdir $wd/Report");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
147
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
148 system("mkdir $wd/Rout");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
149 system("mkdir $wd/Plots");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
150
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
151 $samplename = $opts{'n'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
152 $samplename =~ s/_/\\_/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
153
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
154 # 0. Preamble
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
155 ## compose preamble
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
156 open OUT, ">$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
157 print OUT '\documentclass[a4paper,10pt]{article}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
158 print OUT '\usepackage[left=2cm,top=1.5cm,right=1.5cm,bottom=2.5cm,nohead]{geometry}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
159 print OUT '\usepackage{longtable}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
160 print OUT '\usepackage[T1]{fontenc}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
161 print OUT '\usepackage{fancyhdr}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
162 print OUT '\usepackage[latin9]{inputenc}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
163 print OUT '\usepackage{color}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
164 print OUT '\usepackage[pdftex]{graphicx}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
165 print OUT '\definecolor{grey}{RGB}{160,160,160}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
166 print OUT '\definecolor{darkgrey}{RGB}{100,100,100}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
167 print OUT '\definecolor{red}{RGB}{255,0,0}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
168 print OUT '\definecolor{orange}{RGB}{238,118,0}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
169 print OUT '\setlength\LTleft{0pt}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
170 print OUT '\setlength\LTright{0pt}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
171 print OUT '\begin{document}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
172 print OUT '\pagestyle{fancy}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
173 print OUT '\fancyhead{}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
174 print OUT '\renewcommand{\footrulewidth}{0.4pt}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
175 print OUT '\renewcommand{\headrulewidth}{0pt}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
176 print OUT '\fancyfoot[R]{\today\hspace{2cm}\thepage\ of \pageref{endofdoc}}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
177 print OUT '\fancyfoot[C]{}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
178 print OUT '\fancyfoot[L]{Coverage Report for ``'.$samplename.'"}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
179 print OUT '\let\oldsubsubsection=\subsubsection'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
180 print OUT '\renewcommand{\subsubsection}{%'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
181 print OUT ' \filbreak'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
182 print OUT ' \oldsubsubsection'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
183 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
184 # main title
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
185 print OUT '\section*{Coverage Report for ``'.$samplename.'"}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
186 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
187
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
188 # 1. Summary Report
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
189 # Get samtools flagstat summary of BAM file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
190 my $flagstat = `samtools flagstat $opts{'b'}`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
191 my @s = split(/\n/,$flagstat);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
192 # Get number of reads mapped in total
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
193 ## updated on 2012-10-1 !!
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
194 $totalmapped = $s[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
195 $totalmapped =~ s/^(\d+)(\s.+)/$1/;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
196 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
197 my $head = `head -n 1 $wd/Targets.Global.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
198 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
199 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
200 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
201 my $covcol = $nrcols - 3;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
202 # get min/max/median/average coverage => values
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
203 my $covs = `cut -f $covcol $wd/Targets.Global.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
204 my @coverages = split(/\n/,$covs);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
205 my ($eavg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@coverages);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
206 my $spec = sprintf("%.1f",($ontarget / $totalmapped)*100);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
207 # get min/max/median/average coverage => boxplot in R
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
208 open OUT, ">$wd/Rout/boxplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
209 print OUT 'coverage <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
210 print OUT 'coverage <- coverage[,'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
211 print OUT 'png(file="../Plots/CoverageBoxPlot.png", bg="white", width=240, height=480)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
212 print OUT 'boxplot(coverage,range=1.5,main="Target Region Coverage")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
213 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
214 close OUT;
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
215 system("cd $wd/Rout && Rscript boxplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
216
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
217 ## global nt coverage plot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
218 ## use perl to make histogram (lower memory)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
219 open IN, "$wd/Targets.Position.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
220 my %dens;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
221 my $counter = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
222 my $sum = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
223 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
224 chomp();
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
225 my @p = split(/\t/);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
226 $sum += $p[-1];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
227 $counter++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
228 if (defined($dens{$p[-1]})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
229 $dens{$p[-1]}++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
230 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
231 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
232 $dens{$p[-1]} = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
233 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
234 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
235 $avg = $sum/$counter;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
236 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
237 open OUT, ">$wd/Rout/hist.txt";
3
39c374d4cba7 Uploaded
geert-vandeweyer
parents: 1
diff changeset
238 if (!defined($dens{'0'})) {
39c374d4cba7 Uploaded
geert-vandeweyer
parents: 1
diff changeset
239 $dens{'0'} = 0;
39c374d4cba7 Uploaded
geert-vandeweyer
parents: 1
diff changeset
240 }
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
241 foreach (keys(%dens)) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
242 print OUT "$_;$dens{$_}\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
243 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
244 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
245 open OUT, ">$wd/Rout/ntplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
246 # read coverage hist in R to plot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
247 print OUT 'coverage <- read.table("hist.txt" , as.is = TRUE, header=FALSE,sep=";")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
248 print OUT 'mincov <- '."$thresh \n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
249 print OUT "avg <- round($avg)\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
250 print OUT "colnames(coverage) <- c('cov','count')\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
251 print OUT 'coverage$cov <- coverage$cov / avg'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
252 print OUT 'rep <- which(coverage$cov > 1)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
253 print OUT 'coverage[coverage$cov > 1,1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
254 print OUT 'values <- coverage[coverage$cov < 1,]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
255 print OUT 'values <- rbind(values,c(1,sum(coverage[coverage$cov == 1,"count"])))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
256 print OUT 'values <- values[order(values$cov),]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
257 print OUT 'prevcount <- 0'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
258 # make cumulative count data frame
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
259 print OUT 'for (i in rev(values$cov)) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
260 print OUT ' values[values$cov == i,"count"] <- prevcount + values[values$cov == i,"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
261 print OUT ' prevcount <- values[values$cov == i,"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
262 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
263 print OUT 'values$count <- values$count / (values[values$cov == 0,"count"] / 100)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
264 # get some values to plot lines.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
265 print OUT 'mincov.x <- mincov/avg'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
266 print OUT 'if (mincov/avg <= 1) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
267 print OUT ' ii <- which(values$cov == mincov.x)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
268 print OUT ' if (length(ii) == 1) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
269 print OUT ' mincov.y <- values[ii[1],"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
270 print OUT ' } else {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
271 print OUT ' i1 <- max(which(values$cov < mincov.x))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
272 print OUT ' i2 <- min(which(values$cov > mincov.x))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
273 print OUT ' mincov.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(mincov.x - values[i1,"cov"]) + values[i1,"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
274 print OUT ' }'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
275 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
276 # open output image and create plot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
277 print OUT 'png(file="../Plots/CoverageNtPlot.png", bg="white", width=540, height=480)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
278 print OUT 'par(xaxs="i",yaxs="i")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
279 print OUT 'plot(values$cov,values$count,ylim=c(0,100),pch=".",main="Cumulative Normalised Base-Coverage Plot",xlab="Normalizalised Coverage",ylab="Cumulative Nr. Of Bases")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
280 print OUT 'lines(values$cov,values$count)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
281 print OUT 'if (mincov.x <= 1) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
282 print OUT ' lines(c(mincov.x,mincov.x),c(0,mincov.y),lty=2,col="darkgreen")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
283 print OUT ' lines(c(0,mincov.x),c(mincov.y,mincov.y),lty=2,col="darkgreen")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
284 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold: '.$thresh.'x")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
285 print OUT ' text(1,(91),pos=2,col="darkgreen",labels=paste("%Bases: ",round(mincov.y,2),"%",sep=""))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
286 print OUT '} else {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
287 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold ('.$thresh.'x) > Average")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
288 print OUT ' text(1,(91),pos=2,col="darkgreen",labels="Plotting impossible")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
289 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
290 print OUT 'frac.x <- '."$frac\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
291 print OUT 'ii <- which(values$cov == frac.x)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
292 print OUT 'if (length(ii) == 1) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
293 print OUT ' frac.y <- values[ii[1],"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
294 print OUT '} else {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
295 print OUT ' i1 <- max(which(values$cov < frac.x))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
296 print OUT ' i2 <- min(which(values$cov > frac.x))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
297 print OUT ' frac.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(frac.x - values[i1,"cov"]) + values[i1,"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
298 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
299 print OUT 'lines(c(frac.x,frac.x),c(0,frac.y),lty=2,col="red")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
300 print OUT 'lines(c(0,frac.x),c(frac.y,frac.y),lty=2,col="red")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
301 #iprint OUT 'text((frac.x+0.05),(frac.y - 2),pos=4,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
302 #print OUT 'text((frac.x+0.05),(frac.y-5),pos=4,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
303 print OUT 'text(1,86,pos=2,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
304 print OUT 'text(1,82,pos=2,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
305
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
306 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
307
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
308 close OUT;
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
309 system("cd $wd/Rout && Rscript ntplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
310 ## PRINT TO .TEX FILE
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
311 open OUT, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
312 # average coverage overviews
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
313 print OUT '\subsection*{Overall Summary}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
314 print OUT '{\small ';
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
315 # left : boxplot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
316 print OUT '\begin{minipage}{0.3\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
317 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageBoxPlot.png}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
318 print OUT '\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
319 # right : cum.cov.plot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
320 print OUT '\hspace{0.6cm}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
321 print OUT '\begin{minipage}{0.65\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
322 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageNtPlot.png}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
323 print OUT '\end{minipage} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
324 ## next line
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
325 print OUT '\begin{minipage}{0.48\linewidth}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
326 print OUT '\vspace{-1.2em}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
327 print OUT '\begin{tabular}{ll}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
328 # bam statistics
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
329 print OUT '\multicolumn{2}{l}{\textbf{\underline{Samtools Flagstat Summary}}} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
330 foreach (@s) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
331 $_ =~ m/^(\d+)\s(.+)$/;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
332 my $one = $1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
333 my $two = $2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
334 $two =~ s/\s\+\s0\s//;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
335 $two = ucfirst($two);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
336 $one =~ s/%/\\%/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
337 # remove '+ 0 ' from front
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
338 $two =~ s/\+\s0\s//;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
339 # remove trailing from end
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
340 $two =~ s/(\s\+.*)|(:.*)/\)/;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
341 $two =~ s/%/\\%/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
342 $two =~ s/>=/\$\\ge\$/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
343 $two = ucfirst($two);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
344 print OUT '\textbf{'.$two.'} & '.$one.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
345 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
346 print OUT '\end{tabular}\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
347 print OUT '\hspace{1.5cm}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
348 # target coverage statistics
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
349 print OUT '\begin{minipage}{0.4\linewidth}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
350 #print OUT '\vspace{-4.8em}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
351 print OUT '\begin{tabular}{ll}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
352 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Region Coverage}}} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
353 print OUT '\textbf{Number of Target Regions} & '.scalar(@coverages).' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
354 print OUT '\textbf{Minimal Region Coverage} & '.$min.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
355 print OUT '\textbf{25\% Region Coverage} & '.$first.' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
356 print OUT '\textbf{50\% (Median) Region Coverage} & '.$med.' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
357 print OUT '\textbf{75\% Region Coverage} & '.$third.' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
358 print OUT '\textbf{Maximal Region Coverage} & '.$max.' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
359 print OUT '\textbf{Average Region Coverage} & '.int($eavg).' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
360 print OUT '\textbf{Mapped On Target} & '.$spec.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
361 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Base Coverage }}} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
362 print OUT '\textbf{Number of Target Bases} & '.$counter.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
363 print OUT '\textbf{Average Base Coverage} & '.int($avg).' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
364 print OUT '\textbf{Non-Covered Bases} & '.$dens{'0'}.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
365 #print OUT '\textbf{Bases Covered $ge$ '.$frac.'xAvg.Cov} & '.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
366 print OUT '\end{tabular}\end{minipage}}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
367 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
368
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
369 # 2. GLOBAL COVERAGE OVERVIEW PER GENE
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
370 @failedexons;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
371 @allexons;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
372 @allregions;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
373 @failedregions;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
374 if (exists($opts{'r'}) || exists($opts{'s'}) || exists($opts{'S'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
375 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
376 my $head = `head -n 1 $wd/Targets.Global.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
377 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
378 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
379 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
380 my $covcol = $nrcols - 3;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
381 # Coverage Plots for each gene => barplots in R, table here.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
382 open IN, "$wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
383 my $currgroup = '';
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
384 my $startline = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
385 my $stopline = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
386 $linecounter = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
387 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
388 $linecounter++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
389 chomp($_);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
390 my @c = split(/\t/,$_);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
391 push(@allregions,$c[0].'-'.$c[1].'-'.$c[2]);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
392 my $group = $c[3];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
393 ## coverage failure?
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
394 if ($c[$nrcol-1] < 1 || $c[$covcol-1] < $thresh) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
395 push(@failedexons,$group);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
396 push(@failedregions,$c[0].'-'.$c[1].'-'.$c[2]);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
397 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
398 ## store exon
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
399 push(@allexons,$group);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
400 ## extract and check gene
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
401 $group =~ s/^(\S+)[\|\s](.+)/$1/;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
402 if ($group ne $currgroup ) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
403 if ($currgroup ne '') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
404 # new gene, make plot.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
405 open OUT, ">$wd/Rout/barplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
406 print OUT 'coveragetable <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
407 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
408 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
409 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
410 print OUT 'coverage[coverage < 1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
411 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
412 # coverage not whole target region => orange
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
413 print OUT 'covperc <- coveragetable[c('.$startline.':'.$stopline.'),'.$nrcols.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
414 print OUT 'colors[covperc<1] <- "orange"'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
415 # coverage below threshold => red
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
416 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
417
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
418 if ($stopline - $startline > 20) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
419 $scale = 2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
420 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
421 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
422 $scale = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
423 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
424 my $width = 480 * $scale;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
425 my $height = 240 * $scale;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
426 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.')'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
427 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
428 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)",ylim=ylim)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
429 print OUT 'text(mp, log10(coverage) + '.(0.4/$scale).',format(coverage),xpd = TRUE,srt=90)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
430 print OUT 'text(mp,par("usr")[3]-0.05,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
431 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
432 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
433 close OUT;
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
434 system("cd $wd/Rout && Rscript barplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
435 if ($scale == 1) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
436 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
437 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
438 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
439 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
440 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
441
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
442 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
443 $currgroup = $group;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
444 $startline = $linecounter;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
445 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
446 $stopline = $linecounter;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
447 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
448 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
449 if ($currgroup ne '') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
450 # last gene, make plot.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
451 open OUT, ">$wd/Rout/barplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
452 print OUT 'coveragetable <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
453 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
454 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
455 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
456 print OUT 'coverage[coverage < 1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
457 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
458 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
459
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
460 if ($stopline - $startline > 20) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
461 $scale = 2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
462 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
463 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
464 $scale = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
465 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
466 my $width = 480 * $scale;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
467 my $height = 240 * $scale;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
468 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.')'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
469 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
470 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)", ylim=ylim)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
471 print OUT 'text(mp, log10(coverage) + log10(2),format(coverage),xpd = TRUE,srt=90)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
472 print OUT 'text(mp,par("usr")[3]-0.1,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
473 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
474 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
475 close OUT;
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
476 system("cd $wd/Rout && Rscript barplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
477 if ($scale == 1) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
478 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
479 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
480 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
481 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
482 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
483 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
484 ## print to TEX
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
485 open OUT, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
486 print OUT '\subsection*{Gene Summaries}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
487 print OUT '\underline{Legend:} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
488 print OUT '{\color{red}\textbf{RED:} Coverage did not reach set threshold of '.$thresh.'} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
489 print OUT '{\color{orange}\textbf{ORANGE:} Coverage was incomplete for the exon. Overruled by red.} \\\\' ."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
490 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
491 foreach (@small) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
492 if ($col > 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
493 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
494 print OUT "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
495 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
496 print OUT '\begin{minipage}{0.5\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
497 print OUT $_."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
498 print OUT '\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
499 $col++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
500 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
501 ## new line
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
502 if ($col == 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
503 print OUT '\\\\'." \n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
504 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
505 foreach(@large) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
506 print OUT $_."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
507 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
508 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
509
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
510 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
511
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
512 # 3. Detailed overview of failed exons (globally failed)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
513 if (exists($opts{'s'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
514 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
515 my $head = `head -n 1 $wd/Targets.Position.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
516 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
517 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
518 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
519 my $covcol = $nrcols;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
520 my $poscol = $nrcols -1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
521 # tex section header
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
522 open TEX, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
523 print TEX '\subsection*{Failed Exon Plots}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
524 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
525 print TEX '\underline{NOTE:} Only exons with global coverage $<$'.$thresh.' or incomplete coverage were plotted \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
526 foreach(@failedregions) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
527 if ($col > 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
528 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
529 print TEX "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
530 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
531 # which exon
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
532 my $region = $_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
533 my $exon = $filehash{$region}{'exon'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
534 # link exon to tmp file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
535 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
536 ## determine transcript orientation and location
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
537 my $firstline = `head -n 1 $exonfile`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
538 my @firstcols = split(/\t/,$firstline);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
539 my $orient = $firstcols[5];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
540 my $genomicchr = $firstcols[0];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
541 my $genomicstart = $firstcols[1];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
542 my $genomicstop = $firstcols[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
543 if ($orient eq '+') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
544 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
545 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
546 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
547 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
548 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
549 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
550 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
551 # print Rscript
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
552 open OUT, ">$wd/Rout/exonplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
553 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
554 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
555 print OUT 'coverage[coverage < 1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
556 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
557
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
558 my $width = 480 ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
559 my $height = 240 ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
560 my $exonstr = $exon;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
561 $exonstr =~ s/\s/_/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
562 $exon =~ s/_/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
563 $exon =~ s/\|/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
564 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.')'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
565 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
566 if ($orient eq '-') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
567 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
568 print OUT 'mtext("'.$subtitle.'")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
569 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
570 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
571 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
572 print OUT 'mtext("'.$subtitle.'")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
573 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
574 print OUT 'lines(positions,log10(coverage))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
575 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
576 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
577 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
578 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
579 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
580 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
581 # run R script
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
582 system("cd $wd/Rout && Rscript exonplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
583 # Add to .TEX
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
584 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
585 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
586 print TEX '\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
587 $col++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
588 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
589 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
590
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
591 ## plot failed (subregion) or all exons
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
592 if (exists($opts{'S'}) || exists($opts{'A'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
593 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
594 my $head = `head -n 1 $wd/Targets.Position.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
595 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
596 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
597 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
598 my $covcol = $nrcols;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
599 my $poscol = $nrcols -1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
600 # tex section header
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
601 open TEX, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
602 print TEX '\subsection*{Failed Exon Plots}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
603 if (exists($opts{'S'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
604 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
605 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
606 elsif (exists($opts{'A'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
607 print TEX '\underline{NOTE:} ALL exons are plotted, regardless of coverage \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
608 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
609 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
610 foreach(@allregions) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
611 if ($col > 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
612 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
613 print TEX "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
614 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
615 # which exon
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
616 my $region = $_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
617 my $exon = $filehash{$region}{'exon'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
618 # grep exon to tmp file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
619 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
620 ## determine transcript orientation.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
621 my $firstline = `head -n 1 $exonfile`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
622 my @firstcols = split(/\t/,$firstline);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
623 my $orient = $firstcols[5];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
624 my $genomicchr = $firstcols[0];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
625 my $genomicstart = $firstcols[1];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
626 my $genomicstop = $firstcols[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
627
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
628 if ($orient eq '+') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
629 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
630 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
631
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
632 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
633 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
634 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
635 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
636
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
637 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
638
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
639 # check if failed
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
640 if (exists($opts{'S'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
641 my $cs = `cut -f $covcol '$exonfile' `;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
642 my @c = split(/\n/,$cs);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
643 @c = sort { $a <=> $b } @c;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
644 if ($c[0] >= $thresh) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
645 # lowest coverage > threshold => skip
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
646 next;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
647 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
648 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
649 # print Rscript
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
650 open OUT, ">$wd/Rout/exonplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
651 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
652 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
653 print OUT 'coverage[coverage < 1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
654 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
655 my $width = 480 ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
656 my $height = 240 ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
657 my $exonstr = $exon;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
658 $exonstr =~ s/\s/_/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
659 $exon =~ s/_/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
660 $exon =~ s/\|/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
661 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.')'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
662 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
663 if ($orient eq '-') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
664 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
665 print OUT 'mtext("'.$subtitle.'")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
666 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
667 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
668 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
669 print OUT 'mtext("'.$subtitle.'")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
670 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
671
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
672 print OUT 'lines(positions,log10(coverage))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
673 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
674 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
675 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
676 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
677 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
678 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
679 # run R script
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
680 system("cd $wd/Rout && Rscript exonplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
681 # Add to .TEX
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
682 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
683 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
684 print TEX '\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
685 $col++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
686 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
687 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
688 ## list failed exons
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
689 if (exists($opts{'L'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
690 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
691 my $head = `head -n 1 $wd/Targets.Position.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
692 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
693 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
694 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
695 my $covcol = $nrcols;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
696 my $poscol = $nrcols -1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
697 ## hash to print
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
698 # tex section header
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
699 open TEX, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
700 print TEX '\subsection*{List of Failed Exons}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
701 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
702 print TEX '{\footnotesize\begin{longtable}[l]{@{\extracolsep{\fill}}llll}'."\n".'\hline'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
703 print TEX '\textbf{Target Name} & \textbf{Genomic Position} & \textbf{Avg.Coverage} & \textbf{Min.Coverage} \\\\'."\n".'\hline'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
704 print TEX '\endhead'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
705 print TEX '\hline '."\n".'\multicolumn{4}{r}{{\textsl{\footnotesize Continued on next page}}} \\\\ '."\n".'\hline' ."\n". '\endfoot' . "\n". '\endlastfoot' . "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
706
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
707 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
708 open IN, "$wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
709 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
710 chomp($_);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
711 my @p = split(/\t/,$_);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
712 my $region = $p[0].'-'.$p[1].'-'.$p[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
713 my $exon = $filehash{$region}{'exon'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
714 # grep exon to tmp file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
715 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
716 ## determine transcript orientation.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
717 my $firstline = `head -n 1 $exonfile`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
718 my @firstcols = split(/\t/,$firstline);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
719 my $orient = $firstcols[5];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
720 my $genomicchr = $firstcols[0];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
721 my $genomicstart = $firstcols[1];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
722 my $genomicstop = $firstcols[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
723
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
724 if ($orient eq '+') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
725 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
726 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
727
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
728 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
729 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
730 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
731 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
732 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
733
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
734 # check if failed
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
735 my $cs = `cut -f $covcol '$exonfile' `;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
736 my @c = split(/\n/,$cs);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
737 my ($avg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@c);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
738
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
739 if ($min >= $thresh) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
740 # lowest coverage > threshold => skip
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
741 next;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
742 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
743
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
744 # print to .tex table
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
745 if (length($exon) > 30) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
746 $exon = substr($exon,0,27) . '...';
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
747 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
748 $exon =~ s/_/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
749 $exon =~ s/\|/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
750
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
751 print TEX "$exon & $subtitle & ".int($avg)." & $min ".'\\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
752 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
753 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
754 print TEX '\hline'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
755 print TEX '\end{longtable}}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
756 close TEX;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
757 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
758
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
759
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
760 ## Close document
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
761 open OUT, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
762 print OUT '\label{endofdoc}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
763 print OUT '\end{document}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
764 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
765 system("cd $wd/Report && pdflatex Report.tex > /dev/null 2>&1 && pdflatex Report.tex > /dev/null 2>&1 ");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
766
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
767 ## mv report to output file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
768 system("cp -f $wd/Report/Report.pdf '$pdffile'");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
769 ##create tar.gz file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
770 system("mkdir $wd/Results");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
771 system("cp -Rf $wd/Plots $wd/Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
772 system("cp -Rf $wd/Report/ $wd/Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
773 if (-e "$wd/Targets.Global.Coverage") {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
774 system("cp -Rf $wd/Targets.Global.Coverage $wd/Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
775 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
776 if (-e "$wd/Targets.Position.Coverage") {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
777 system("cp -Rf $wd/Targets.Position.Coverage $wd/Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
778 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
779
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
780 system("cd $wd && tar czf '$tarfile' Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
781 ## clean up (galaxy stores outside wd)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
782 system("rm -Rf $wd");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
783 ###############
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
784 ## FUNCTIONS ##
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
785 ###############
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
786 sub arraystats{
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
787 my @array = @_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
788 my $count = scalar(@array);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
789 @array = sort { $a <=> $b } @array;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
790 # median
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
791 my $median = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
792 if ($count % 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
793 $median = $array[int($count/2)];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
794 } else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
795 $median = ($array[$count/2] + $array[$count/2 - 1]) / 2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
796 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
797 # average
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
798 my $sum = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
799 foreach (@array) { $sum += $_; }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
800 my $average = $sum / $count;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
801 # quantiles (rounded)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
802 my $quart = int($count/4) ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
803 my $first = $array[$quart];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
804 my $third = $array[($quart*3)];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
805 my $min = $array[0];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
806 my $max = $array[($count-1)];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
807 return ($average,$median,$min,$max,$first,$third,$sum);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
808 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
809
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
810 sub GlobalSummary {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
811 my ($bam,$targets) = @_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
812
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
813 my $command = "cd $wd && coverageBed -abam $bam -b $targets > $wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
814 if (exists($commandsrun{$command})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
815 return;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
816 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
817 system($command);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
818 $commandsrun{$command} = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
819 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
820
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
821 sub CoveragePerRegion {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
822 my ($bam,$targets) = @_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
823 my $command = "cd $wd && coverageBed -abam $bam -b $targets > $wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
824 if (exists($commandsrun{$command})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
825 return;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
826 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
827 system($command);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
828 $commandsrun{$command} = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
829 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
830
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
831 sub SubRegionCoverage {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
832 my ($bam,$targets) = @_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
833 my $command = "cd $wd && coverageBed -abam $bam -b $targets -d > $wd/Targets.Position.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
834 system($command);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
835 $commandsrun{$command} = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
836 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
837