Mercurial > repos > iuc > coverage_report
comparison CoverageReport.pl @ 0:30f8f85e3f98 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/coverage_report commit 1b647bb088f62c1369d963f030caecaa9ee003c7
| author | iuc |
|---|---|
| date | Wed, 25 Oct 2017 12:37:35 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:30f8f85e3f98 |
|---|---|
| 1 #!/usr/bin/env perl | |
| 2 # load modules | |
| 3 use warnings; | |
| 4 use Getopt::Std; | |
| 5 use File::Basename; | |
| 6 use Number::Format; | |
| 7 use Cwd; | |
| 8 | |
| 9 my %opts; | |
| 10 | |
| 11 # number format | |
| 12 my $de = new Number::Format(-thousands_sep =>',',-decimal_point => '.'); | |
| 13 | |
| 14 ########## | |
| 15 ## opts ## | |
| 16 ########## | |
| 17 ## input files | |
| 18 # b : path to input (b)am file | |
| 19 # t : path to input (t)arget regions in BED format | |
| 20 ## output files | |
| 21 # o : report pdf (o)utput file | |
| 22 ## entries in the report | |
| 23 # r : Coverage per (r)egion (boolean) | |
| 24 # s : (s)ubregion coverage if average < specified (plots for positions along target region) (boolean) | |
| 25 # S : (S)ubregion coverage for ALL failed exons => use either s OR S or you will have double plots. | |
| 26 # A : (A)ll exons will be plotted. | |
| 27 # L : (L)ist failed exons instead of plotting | |
| 28 # m : (m)inimal Coverage threshold | |
| 29 # f : fraction of average as threshold | |
| 30 # n : sample (n)ame. | |
| 31 | |
| 32 | |
| 33 getopts('b:t:o:rsSALm:n:f:', \%opts) ; | |
| 34 | |
| 35 my $tmp = getcwd(); | |
| 36 # make output directory in (tmp) working dir | |
| 37 our $wd = "$tmp/Coverage.".int(rand(1000)); | |
| 38 while (-d $wd) { | |
| 39 $wd = "$tmp/Coverage.".int(rand(1000)); | |
| 40 } | |
| 41 system("mkdir $wd"); | |
| 42 | |
| 43 ## variables | |
| 44 our %commandsrun = (); | |
| 45 my ($thresh,$frac,$pdffile,$samplename,$totalmapped); | |
| 46 | |
| 47 | |
| 48 if (!exists($opts{'b'}) || !-e $opts{'b'}) { | |
| 49 die('Bam File not found'); | |
| 50 } | |
| 51 if (!exists($opts{'t'}) || !-e $opts{'t'}) { | |
| 52 die('Target File (BED) not found'); | |
| 53 } | |
| 54 | |
| 55 if (exists($opts{'m'})) { | |
| 56 $thresh = $opts{'m'}; | |
| 57 } | |
| 58 else { | |
| 59 $thresh = 40; | |
| 60 } | |
| 61 | |
| 62 if (exists($opts{'f'})) { | |
| 63 $frac = $opts{'f'}; | |
| 64 } | |
| 65 else { | |
| 66 $frac = 0.2; | |
| 67 } | |
| 68 | |
| 69 if (exists($opts{'o'})) { | |
| 70 $pdffile = $opts{'o'}; | |
| 71 } | |
| 72 else { | |
| 73 $pdffile = "$wd/CoverageReport.pdf"; | |
| 74 } | |
| 75 | |
| 76 | |
| 77 # 1. Global Summary => default | |
| 78 &GlobalSummary($opts{'b'}, $opts{'t'}); | |
| 79 | |
| 80 # 2. Coverage per position | |
| 81 &SubRegionCoverage($opts{'b'}, $opts{'t'}); | |
| 82 our %filehash; | |
| 83 if (exists($opts{'s'}) || exists($opts{'S'}) || exists($opts{'A'}) || exists($opts{'L'})) { | |
| 84 system("mkdir $wd/SplitFiles"); | |
| 85 ## get position coverages | |
| 86 ## split input files | |
| 87 open IN, "$wd/Targets.Position.Coverage"; | |
| 88 my $fileidx = 0; | |
| 89 my $currreg = ''; | |
| 90 while (<IN>) { | |
| 91 my $line = $_; | |
| 92 chomp($line); | |
| 93 my @p = split(/\t/,$line); | |
| 94 my $reg = $p[0].'-'.$p[1].'-'.$p[2]; #.$p[3]; | |
| 95 my $ex = $p[3]; | |
| 96 if ($reg ne $currreg) { | |
| 97 ## new exon open new outfile | |
| 98 if ($currreg ne '') { | |
| 99 ## filehandle is open. close it | |
| 100 close OUT; | |
| 101 } | |
| 102 if (!exists($filehash{$reg})) { | |
| 103 $fileidx++; | |
| 104 $filehash{$reg}{'idx'} = $fileidx; | |
| 105 $filehash{$reg}{'exon'} = $ex; | |
| 106 open OUT, ">> $wd/SplitFiles/File_$fileidx.txt"; | |
| 107 $currreg = $reg; | |
| 108 } | |
| 109 else { | |
| 110 open OUT, ">> $wd/SplitFiles/File_".$filehash{$reg}{'idx'}.".txt"; | |
| 111 $currreg = $reg; | |
| 112 } | |
| 113 } | |
| 114 ## print the line to the open filehandle. | |
| 115 print OUT "$line\n"; | |
| 116 } | |
| 117 close OUT; | |
| 118 close IN; | |
| 119 | |
| 120 } | |
| 121 | |
| 122 ## sort output files according to targets file | |
| 123 if (exists($opts{'r'}) ) { | |
| 124 my %hash = (); | |
| 125 open IN, "$wd/Targets.Global.Coverage"; | |
| 126 while (<IN>) { | |
| 127 chomp; | |
| 128 my @p = split(/\t/,$_) ; | |
| 129 $hash{$p[3]} = $_; | |
| 130 } | |
| 131 close IN; | |
| 132 open OUT, ">$wd/Targets.Global.Coverage"; | |
| 133 open IN, $opts{'t'}; | |
| 134 while (<IN>) { | |
| 135 chomp; | |
| 136 my @p = split(/\t/,$_) ; | |
| 137 print OUT $hash{$p[3]} . "\n"; | |
| 138 } | |
| 139 close IN; | |
| 140 close OUT; | |
| 141 } | |
| 142 | |
| 143 | |
| 144 #################################### | |
| 145 ## PROCESS RESULTS & CREATE PLOTS ## | |
| 146 #################################### | |
| 147 system("mkdir $wd/Report"); | |
| 148 | |
| 149 system("mkdir $wd/Rout"); | |
| 150 system("mkdir $wd/Plots"); | |
| 151 | |
| 152 $samplename = $opts{'n'}; | |
| 153 $samplename =~ s/_/\\_/g; | |
| 154 $samplename =~ s/\..+$//g; | |
| 155 | |
| 156 # 0. Preamble | |
| 157 ## compose preamble | |
| 158 open OUT, ">$wd/Report/Report.tex"; | |
| 159 print OUT '\documentclass[a4paper,10pt]{article}'."\n"; | |
| 160 print OUT '\usepackage[left=2cm,top=1.5cm,right=1.5cm,bottom=2.5cm,nohead]{geometry}'."\n"; | |
| 161 print OUT '\usepackage{longtable}'."\n"; | |
| 162 print OUT '\usepackage{fancyhdr}'."\n"; | |
| 163 print OUT '\usepackage{fontspec}' . "\n"; | |
| 164 print OUT '\usepackage{color}'."\n"; | |
| 165 print OUT '\definecolor{grey}{RGB}{160,160,160}'."\n"; | |
| 166 print OUT '\definecolor{darkgrey}{RGB}{100,100,100}'."\n"; | |
| 167 print OUT '\definecolor{red}{RGB}{255,0,0}'."\n"; | |
| 168 print OUT '\definecolor{orange}{RGB}{238,118,0}'."\n"; | |
| 169 print OUT '\setlength\LTleft{0pt}'."\n"; | |
| 170 print OUT '\setlength\LTright{0pt}'."\n"; | |
| 171 print OUT '\begin{document}'."\n"; | |
| 172 print OUT '\pagestyle{fancy}'."\n"; | |
| 173 print OUT '\fancyhead{}'."\n"; | |
| 174 print OUT '\renewcommand{\footrulewidth}{0.4pt}'."\n"; | |
| 175 print OUT '\renewcommand{\headrulewidth}{0pt}'."\n"; | |
| 176 print OUT '\fancyfoot[R]{\today\hspace{2cm}\thepage\ of \pageref{endofdoc}}'."\n"; | |
| 177 print OUT '\fancyfoot[C]{}'."\n"; | |
| 178 print OUT '\fancyfoot[L]{Coverage Report for ``'.$samplename.'"}'."\n"; | |
| 179 print OUT '\let\oldsubsubsection=\subsubsection'."\n"; | |
| 180 print OUT '\renewcommand{\subsubsection}{%'."\n"; | |
| 181 print OUT ' \filbreak'."\n"; | |
| 182 print OUT ' \oldsubsubsection'."\n"; | |
| 183 print OUT '}'."\n"; | |
| 184 # main title | |
| 185 print OUT '\section*{Coverage Report for ``'.$samplename.'"}'."\n"; | |
| 186 close OUT; | |
| 187 | |
| 188 # 1. Summary Report | |
| 189 # Get samtools flagstat summary of BAM file | |
| 190 my $flagstat = `samtools flagstat $opts{'b'}`; | |
| 191 my @s = split(/\n/,$flagstat); | |
| 192 # Get number of reads mapped in total | |
| 193 ## updated on 2012-10-1 !! | |
| 194 $totalmapped = $s[2]; | |
| 195 $totalmapped =~ s/^(\d+)(\s.+)/$1/; | |
| 196 | |
| 197 if ( not $totalmapped){ | |
| 198 exit; | |
| 199 } | |
| 200 | |
| 201 # count columns | |
| 202 my $head = `head -n 1 $wd/Targets.Global.Coverage`; | |
| 203 chomp($head); | |
| 204 my @cols = split(/\t/,$head); | |
| 205 my $nrcols = scalar(@cols); | |
| 206 my $covcol = $nrcols - 3; | |
| 207 # get min/max/median/average coverage => values | |
| 208 my $covs = `cut -f $covcol $wd/Targets.Global.Coverage`; | |
| 209 my @coverages = split(/\n/,$covs); | |
| 210 my ($eavg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@coverages); | |
| 211 my $spec = 0; | |
| 212 $spec=sprintf("%.1f",($ontarget / $totalmapped)*100); | |
| 213 # get min/max/median/average coverage => boxplot in R | |
| 214 open OUT, ">$wd/Rout/boxplot.R"; | |
| 215 print OUT 'coverage <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
| 216 print OUT 'coverage <- coverage[,'.$covcol.']'."\n"; | |
| 217 print OUT 'png(file="../Plots/CoverageBoxPlot.png", bg="white", width=240, height=480)'."\n"; | |
| 218 print OUT 'boxplot(coverage,range=1.5,main="Target Region Coverage")'."\n"; | |
| 219 print OUT 'graphics.off()'."\n"; | |
| 220 close OUT; | |
| 221 system("cd $wd/Rout && Rscript boxplot.R") == 0 || die "Could not run boxplot.R with following error '$!'\n"; | |
| 222 | |
| 223 ## global nt coverage plot | |
| 224 ## use perl to make histogram (lower memory) | |
| 225 open IN, "$wd/Targets.Position.Coverage"; | |
| 226 my %dens; | |
| 227 my $counter = 0; | |
| 228 my $sum = 0; | |
| 229 my $avg = 0; | |
| 230 | |
| 231 while (<IN>) { | |
| 232 chomp(); | |
| 233 my @p = split(/\t/); | |
| 234 $sum += $p[-1]; | |
| 235 $counter++; | |
| 236 if (defined($dens{$p[-1]})) { | |
| 237 $dens{$p[-1]}++; | |
| 238 } | |
| 239 else { | |
| 240 $dens{$p[-1]} = 1; | |
| 241 } | |
| 242 } | |
| 243 $avg = $sum/$counter; | |
| 244 close IN; | |
| 245 open OUT, ">$wd/Rout/hist.txt"; | |
| 246 if (!defined($dens{'0'})) { | |
| 247 $dens{'0'} = 0; | |
| 248 } | |
| 249 foreach (keys(%dens)) { | |
| 250 print OUT "$_;$dens{$_}\n"; | |
| 251 } | |
| 252 close OUT; | |
| 253 open OUT, ">$wd/Rout/ntplot.R"; | |
| 254 # read coverage hist in R to plot | |
| 255 print OUT 'coverage <- read.table("hist.txt" , as.is = TRUE, header=FALSE,sep=";")'."\n"; | |
| 256 print OUT 'mincov <- '."$thresh \n"; | |
| 257 print OUT "avg <- round($avg,digits=8)\n"; | |
| 258 print OUT "colnames(coverage) <- c('cov','count')\n"; | |
| 259 print OUT 'coverage$cov <- coverage$cov / avg'."\n"; | |
| 260 print OUT 'rep <- which(coverage$cov > 1)'."\n"; | |
| 261 print OUT 'coverage[coverage$cov > 1,1] <- 1'."\n"; | |
| 262 print OUT 'values <- coverage[coverage$cov < 1,]'."\n"; | |
| 263 print OUT 'values <- rbind(values,c(1,sum(coverage[coverage$cov == 1,"count"])))'."\n"; | |
| 264 print OUT 'values <- values[order(values$cov),]'."\n"; | |
| 265 print OUT 'prevcount <- 0'."\n"; | |
| 266 # make cumulative count data frame | |
| 267 print OUT 'for (i in rev(values$cov)) {'."\n"; | |
| 268 print OUT ' values[values$cov == i,"count"] <- prevcount + values[values$cov == i,"count"]'."\n"; | |
| 269 print OUT ' prevcount <- values[values$cov == i,"count"]'."\n"; | |
| 270 print OUT '}'."\n"; | |
| 271 print OUT 'values$count <- values$count / (values[values$cov == 0,"count"] / 100)'."\n"; | |
| 272 # get some values to plot lines. | |
| 273 print OUT 'mincov.x <- mincov/avg'."\n"; | |
| 274 print OUT 'if (mincov/avg <= 1) {'."\n"; | |
| 275 print OUT ' ii <- which(values$cov == mincov.x)'."\n"; | |
| 276 print OUT ' if (length(ii) == 1) {'."\n"; | |
| 277 print OUT ' mincov.y <- values[ii[1],"count"]'."\n"; | |
| 278 print OUT ' } else {'."\n"; | |
| 279 print OUT ' i1 <- max(which(values$cov < mincov.x))'."\n"; | |
| 280 print OUT ' i2 <- min(which(values$cov > mincov.x))'."\n"; | |
| 281 print OUT ' mincov.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(mincov.x - values[i1,"cov"]) + values[i1,"count"]'."\n"; | |
| 282 print OUT ' }'."\n"; | |
| 283 print OUT '}'."\n"; | |
| 284 # open output image and create plot | |
| 285 print OUT 'png(file="../Plots/CoverageNtPlot.png", bg="white", width=540, height=480)'."\n"; | |
| 286 print OUT 'par(xaxs="i",yaxs="i")'."\n"; | |
| 287 print OUT 'plot(values$cov,values$count,ylim=c(0,100),pch=".",main="Cumulative Normalised Base-Coverage Plot",xlab="Normalizalised Coverage",ylab="Cumulative Nr. Of Bases")'."\n"; | |
| 288 print OUT 'lines(values$cov,values$count)'."\n"; | |
| 289 print OUT 'if (mincov.x <= 1) {'."\n"; | |
| 290 print OUT ' lines(c(mincov.x,mincov.x),c(0,mincov.y),lty=2,col="darkgreen")'."\n"; | |
| 291 print OUT ' lines(c(0,mincov.x),c(mincov.y,mincov.y),lty=2,col="darkgreen")'."\n"; | |
| 292 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold: '.$thresh.'x")'."\n"; | |
| 293 print OUT ' text(1,(91),pos=2,col="darkgreen",labels=paste("%Bases: ",round(mincov.y,2),"%",sep=""))'."\n"; | |
| 294 print OUT '} else {'."\n"; | |
| 295 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold ('.$thresh.'x) > Average")'."\n"; | |
| 296 print OUT ' text(1,(91),pos=2,col="darkgreen",labels="Plotting impossible")'."\n"; | |
| 297 print OUT '}'."\n"; | |
| 298 print OUT 'frac.x <- '."$frac\n"; | |
| 299 print OUT 'ii <- which(values$cov == frac.x)'."\n"; | |
| 300 print OUT 'if (length(ii) == 1) {'."\n"; | |
| 301 print OUT ' frac.y <- values[ii[1],"count"]'."\n"; | |
| 302 print OUT '} else {'."\n"; | |
| 303 print OUT ' i1 <- max(which(values$cov < frac.x))'."\n"; | |
| 304 print OUT ' i2 <- min(which(values$cov > frac.x))'."\n"; | |
| 305 print OUT ' frac.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(frac.x - values[i1,"cov"]) + values[i1,"count"]'."\n"; | |
| 306 print OUT '}'."\n"; | |
| 307 print OUT 'lines(c(frac.x,frac.x),c(0,frac.y),lty=2,col="red")'."\n"; | |
| 308 print OUT 'lines(c(0,frac.x),c(frac.y,frac.y),lty=2,col="red")'."\n"; | |
| 309 #iprint OUT 'text((frac.x+0.05),(frac.y - 2),pos=4,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n"; | |
| 310 #print OUT 'text((frac.x+0.05),(frac.y-5),pos=4,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n"; | |
| 311 print OUT 'text(1,86,pos=2,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n"; | |
| 312 print OUT 'text(1,82,pos=2,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n"; | |
| 313 | |
| 314 print OUT 'graphics.off()'."\n"; | |
| 315 | |
| 316 close OUT; | |
| 317 system("cd $wd/Rout && Rscript ntplot.R") == 0 || die "Could not run ntplot.R with following error '$!'\n"; | |
| 318 ## PRINT TO .TEX FILE | |
| 319 open OUT, ">>$wd/Report/Report.tex"; | |
| 320 # average coverage overviews | |
| 321 print OUT '\subsection*{Overall Summary}'."\n"; | |
| 322 print OUT '{\small '; | |
| 323 # left : boxplot | |
| 324 print OUT '\begin{minipage}{0.3\linewidth}\centering'."\n"; | |
| 325 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageBoxPlot.png}'."\n"; | |
| 326 print OUT '\end{minipage}'."\n"; | |
| 327 # right : cum.cov.plot | |
| 328 print OUT '\hspace{0.6cm}'."\n"; | |
| 329 print OUT '\begin{minipage}{0.65\linewidth}\centering'."\n"; | |
| 330 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageNtPlot.png}'."\n"; | |
| 331 print OUT '\end{minipage} \\\\'."\n"; | |
| 332 ## next line | |
| 333 print OUT '\begin{minipage}{0.48\linewidth}'."\n"; | |
| 334 print OUT '\vspace{-1.2em}'."\n"; | |
| 335 print OUT '\begin{tabular}{ll}'."\n"; | |
| 336 # bam statistics | |
| 337 print OUT '\multicolumn{2}{l}{\textbf{\underline{Samtools Flagstat Summary}}} \\\\'."\n"; | |
| 338 foreach (@s) { | |
| 339 $_ =~ m/^(\d+)\s(.+)$/; | |
| 340 my $one = $1; | |
| 341 my $two = $2; | |
| 342 $two =~ s/\s\+\s0\s//; | |
| 343 $two = ucfirst($two); | |
| 344 $one =~ s/%/\\%/g; | |
| 345 # remove '+ 0 ' from front | |
| 346 $two =~ s/\+\s0\s//; | |
| 347 # remove trailing from end | |
| 348 $two =~ s/(\s\+.*)|(:.*)/\)/; | |
| 349 $two =~ s/%/\\%/g; | |
| 350 $two =~ s/>=/\$\\ge\$/g; | |
| 351 $two = ucfirst($two); | |
| 352 print OUT '\textbf{'.$two.'} & '.$one.' \\\\'."\n"; | |
| 353 } | |
| 354 print OUT '\end{tabular}\end{minipage}'."\n"; | |
| 355 print OUT '\hspace{1.5cm}'."\n"; | |
| 356 # target coverage statistics | |
| 357 print OUT '\begin{minipage}{0.4\linewidth}'."\n"; | |
| 358 #print OUT '\vspace{-4.8em}'."\n"; | |
| 359 print OUT '\begin{tabular}{ll}'."\n"; | |
| 360 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Region Coverage}}} \\\\'."\n"; | |
| 361 print OUT '\textbf{Number of Target Regions} & '.scalar(@coverages).' \\\\'."\n"; | |
| 362 print OUT '\textbf{Minimal Region Coverage} & '.$min.' \\\\'."\n"; | |
| 363 print OUT '\textbf{25\% Region Coverage} & '.$first.' \\\\'. "\n"; | |
| 364 print OUT '\textbf{50\% (Median) Region Coverage} & '.$med.' \\\\'. "\n"; | |
| 365 print OUT '\textbf{75\% Region Coverage} & '.$third.' \\\\'. "\n"; | |
| 366 print OUT '\textbf{Maximal Region Coverage} & '.$max.' \\\\'. "\n"; | |
| 367 print OUT '\textbf{Average Region Coverage} & '.int($eavg).' \\\\'. "\n"; | |
| 368 print OUT '\textbf{Mapped On Target} & '.$spec.' \\\\'."\n"; | |
| 369 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Base Coverage }}} \\\\'."\n"; | |
| 370 print OUT '\textbf{Number of Target Bases} & '.$counter.' \\\\'."\n"; | |
| 371 print OUT '\textbf{Average Base Coverage} & '.int($avg).' \\\\'. "\n"; | |
| 372 print OUT '\textbf{Non-Covered Bases} & '.$dens{'0'}.' \\\\'."\n"; | |
| 373 #print OUT '\textbf{Bases Covered $ge$ '.$frac.'xAvg.Cov} & '. | |
| 374 print OUT '\end{tabular}\end{minipage}}'."\n"; | |
| 375 close OUT; | |
| 376 | |
| 377 # 2. GLOBAL COVERAGE OVERVIEW PER GENE | |
| 378 my @failedexons; | |
| 379 my @allexons; | |
| 380 my @allregions; | |
| 381 my @failedregions; | |
| 382 if (exists($opts{'r'}) || exists($opts{'s'}) || exists($opts{'S'})) { | |
| 383 # count columns | |
| 384 my $head = `head -n 1 $wd/Targets.Global.Coverage`; | |
| 385 chomp($head); | |
| 386 my @cols = split(/\t/,$head); | |
| 387 my $nrcols = scalar(@cols); | |
| 388 my $covcol = $nrcols - 3; | |
| 389 # Coverage Plots for each gene => barplots in R, table here. | |
| 390 open IN, "$wd/Targets.Global.Coverage"; | |
| 391 my $currgroup = ''; | |
| 392 my $startline = 0; | |
| 393 my $stopline = 0; | |
| 394 my $linecounter = 0; | |
| 395 my $nrcol=0; | |
| 396 while (<IN>) { | |
| 397 $linecounter++; | |
| 398 chomp($_); | |
| 399 my @c = split(/\t/,$_); | |
| 400 push(@allregions,$c[0].'-'.$c[1].'-'.$c[2]); | |
| 401 my $group = $c[3]; | |
| 402 ## coverage failure? | |
| 403 if ($c[$nrcol-1] < 1 || $c[$covcol-1] < $thresh) { | |
| 404 push(@failedexons,$group); | |
| 405 push(@failedregions,$c[0].'-'.$c[1].'-'.$c[2]); | |
| 406 } | |
| 407 ## store exon | |
| 408 push(@allexons,$group); | |
| 409 ## extract and check gene | |
| 410 $group =~ s/^(\S+)[\|\s](.+)/$1/; | |
| 411 | |
| 412 my $scale; | |
| 413 if ($group ne $currgroup ) { | |
| 414 if ($currgroup ne '') { | |
| 415 # new gene, make plot. | |
| 416 open OUT, ">$wd/Rout/barplot.R"; | |
| 417 print OUT 'coveragetable <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
| 418 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n"; | |
| 419 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n"; | |
| 420 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n"; | |
| 421 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
| 422 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n"; | |
| 423 # coverage not whole target region => orange | |
| 424 print OUT 'covperc <- coveragetable[c('.$startline.':'.$stopline.'),'.$nrcols.']'."\n"; | |
| 425 print OUT 'colors[covperc<1] <- "orange"'."\n"; | |
| 426 # coverage below threshold => red | |
| 427 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n"; | |
| 428 | |
| 429 if ($stopline - $startline > 20) { | |
| 430 $scale = 2; | |
| 431 } | |
| 432 else { | |
| 433 $scale = 1; | |
| 434 } | |
| 435 my $width = 480 * $scale; | |
| 436 my $height = 240 * $scale; | |
| 437 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.')'."\n"; | |
| 438 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n"; | |
| 439 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)",ylim=ylim)'."\n"; | |
| 440 print OUT 'text(mp, log10(coverage) + '.(0.4/$scale).',format(coverage),xpd = TRUE,srt=90)'."\n"; | |
| 441 print OUT 'text(mp,par("usr")[3]-0.05,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n"; | |
| 442 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
| 443 print OUT 'graphics.off()'."\n"; | |
| 444 close OUT; | |
| 445 system("cd $wd/Rout && Rscript barplot.R") == 0 || die "Could not run barplot.R with following error '$!'\n"; | |
| 446 if ($scale == 1) { | |
| 447 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
| 448 } | |
| 449 else { | |
| 450 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
| 451 } | |
| 452 | |
| 453 } | |
| 454 $currgroup = $group; | |
| 455 $startline = $linecounter; | |
| 456 } | |
| 457 $stopline = $linecounter; | |
| 458 } | |
| 459 close IN; | |
| 460 if ($currgroup ne '') { | |
| 461 # last gene, make plot. | |
| 462 open OUT, ">$wd/Rout/barplot.R"; | |
| 463 print OUT 'coveragetable <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
| 464 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n"; | |
| 465 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n"; | |
| 466 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n"; | |
| 467 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
| 468 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n"; | |
| 469 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n"; | |
| 470 | |
| 471 if ($stopline - $startline > 20) { | |
| 472 $scale = 2; | |
| 473 } | |
| 474 else { | |
| 475 $scale = 1; | |
| 476 } | |
| 477 my $width = 480 * $scale; | |
| 478 my $height = 240 * $scale; | |
| 479 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.')'."\n"; | |
| 480 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n"; | |
| 481 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)", ylim=ylim)'."\n"; | |
| 482 print OUT 'text(mp, log10(coverage) + log10(2),format(coverage),xpd = TRUE,srt=90)'."\n"; | |
| 483 print OUT 'text(mp,par("usr")[3]-0.1,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n"; | |
| 484 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
| 485 print OUT 'graphics.off()'."\n"; | |
| 486 close OUT; | |
| 487 system("cd $wd/Rout && Rscript barplot.R") == 0 || die "Could not run barplot.R with following error '$!'\n"; | |
| 488 if ($scale == 1) { | |
| 489 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
| 490 } | |
| 491 else { | |
| 492 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
| 493 } | |
| 494 } | |
| 495 ## print to TEX | |
| 496 open OUT, ">>$wd/Report/Report.tex"; | |
| 497 print OUT '\subsection*{Gene Summaries}'."\n"; | |
| 498 print OUT '\underline{Legend:} \\\\'."\n"; | |
| 499 print OUT '{\color{red}\textbf{RED:} Coverage did not reach set threshold of '.$thresh.'} \\\\'."\n"; | |
| 500 print OUT '{\color{orange}\textbf{ORANGE:} Coverage was incomplete for the exon. Overruled by red.} \\\\' ."\n"; | |
| 501 my $col = 1; | |
| 502 foreach (@small) { | |
| 503 if ($col > 2) { | |
| 504 $col = 1; | |
| 505 print OUT "\n"; | |
| 506 } | |
| 507 print OUT '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
| 508 print OUT $_."\n"; | |
| 509 print OUT '\end{minipage}'."\n"; | |
| 510 $col++; | |
| 511 } | |
| 512 ## new line | |
| 513 if ($col == 2) { | |
| 514 print OUT '\\\\'." \n"; | |
| 515 } | |
| 516 foreach(@large) { | |
| 517 print OUT $_."\n"; | |
| 518 } | |
| 519 close OUT; | |
| 520 | |
| 521 } | |
| 522 | |
| 523 # 3. Detailed overview of failed exons (globally failed) | |
| 524 if (exists($opts{'s'})) { | |
| 525 # count columns | |
| 526 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
| 527 chomp($head); | |
| 528 my $subtitle; | |
| 529 my @cols = split(/\t/,$head); | |
| 530 my $nrcols = scalar(@cols); | |
| 531 my $covcol = $nrcols; | |
| 532 my $poscol = $nrcols -1; | |
| 533 # tex section header | |
| 534 open TEX, ">>$wd/Report/Report.tex"; | |
| 535 print TEX '\subsection*{Failed Exon Plots}'."\n"; | |
| 536 $col = 1; | |
| 537 print TEX '\underline{NOTE:} Only exons with global coverage $<$'.$thresh.' or incomplete coverage were plotted \\\\'."\n"; | |
| 538 foreach(@failedregions) { | |
| 539 if ($col > 2) { | |
| 540 $col = 1; | |
| 541 print TEX "\n"; | |
| 542 } | |
| 543 # which exon | |
| 544 my $region = $_; | |
| 545 my $exon = $filehash{$region}{'exon'}; | |
| 546 # link exon to tmp file | |
| 547 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt"; | |
| 548 ## determine transcript orientation and location | |
| 549 my $firstline = `head -n 1 $exonfile`; | |
| 550 my @firstcols = split(/\t/,$firstline); | |
| 551 my $orient = $firstcols[5]; | |
| 552 my $genomicchr = $firstcols[0]; | |
| 553 my $genomicstart = $firstcols[1]; | |
| 554 my $genomicstop = $firstcols[2]; | |
| 555 if ($orient eq '+') { | |
| 556 $bps = $genomicstop - $genomicstart + 1; | |
| 557 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); | |
| 558 } | |
| 559 else { | |
| 560 $bps = $genomicstop - $genomicstart + 1; | |
| 561 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); | |
| 562 } | |
| 563 # print Rscript | |
| 564 open OUT, ">$wd/Rout/exonplot.R"; | |
| 565 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
| 566 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n"; | |
| 567 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
| 568 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n"; | |
| 569 | |
| 570 my $width = 480 ; | |
| 571 my $height = 240 ; | |
| 572 my $exonstr = $exon; | |
| 573 $exonstr =~ s/\s/_/g; | |
| 574 $exon =~ s/_/ /g; | |
| 575 $exon =~ s/\|/ /g; | |
| 576 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.')'."\n"; | |
| 577 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n"; | |
| 578 if ($orient eq '-') { | |
| 579 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n"; | |
| 580 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
| 581 } | |
| 582 else { | |
| 583 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n"; | |
| 584 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
| 585 } | |
| 586 print OUT 'lines(positions,log10(coverage))'."\n"; | |
| 587 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
| 588 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n"; | |
| 589 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n"; | |
| 590 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n"; | |
| 591 print OUT 'graphics.off()'."\n"; | |
| 592 close OUT; | |
| 593 # run R script | |
| 594 system("cd $wd/Rout && Rscript exonplot.R")== 0 || die "Could not run exonplot.R with following error '$!'\n"; | |
| 595 # Add to .TEX | |
| 596 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
| 597 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n"; | |
| 598 print TEX '\end{minipage}'."\n"; | |
| 599 $col++; | |
| 600 } | |
| 601 } | |
| 602 | |
| 603 ## plot failed (subregion) or all exons | |
| 604 if (exists($opts{'S'}) || exists($opts{'A'})) { | |
| 605 # count columns | |
| 606 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
| 607 chomp($head); | |
| 608 my @cols = split(/\t/,$head); | |
| 609 my $nrcols = scalar(@cols); | |
| 610 my $covcol = $nrcols; | |
| 611 my $poscol = $nrcols -1; | |
| 612 # tex section header | |
| 613 open TEX, ">>$wd/Report/Report.tex"; | |
| 614 print TEX '\subsection*{Failed Exon Plots}'."\n"; | |
| 615 if (exists($opts{'S'})) { | |
| 616 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n"; | |
| 617 } | |
| 618 elsif (exists($opts{'A'})) { | |
| 619 print TEX '\underline{NOTE:} ALL exons are plotted, regardless of coverage \\\\'."\n"; | |
| 620 } | |
| 621 $col = 1; | |
| 622 foreach(@allregions) { | |
| 623 if ($col > 2) { | |
| 624 $col = 1; | |
| 625 print TEX "\n"; | |
| 626 } | |
| 627 # which exon | |
| 628 my $region = $_; | |
| 629 my $exon = $filehash{$region}{'exon'}; | |
| 630 # grep exon to tmp file | |
| 631 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt"; | |
| 632 ## determine transcript orientation. | |
| 633 my $firstline = `head -n 1 $exonfile`; | |
| 634 my @firstcols = split(/\t/,$firstline); | |
| 635 my $orient = $firstcols[5]; | |
| 636 my $genomicchr = $firstcols[0]; | |
| 637 my $genomicstart = $firstcols[1]; | |
| 638 my $genomicstop = $firstcols[2]; | |
| 639 my $subtitle; | |
| 640 | |
| 641 if ($orient eq '+') { | |
| 642 my $bps = $genomicstop - $genomicstart + 1; | |
| 643 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); | |
| 644 | |
| 645 } | |
| 646 else { | |
| 647 my $bps = $genomicstop - $genomicstart + 1; | |
| 648 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); | |
| 649 | |
| 650 } | |
| 651 | |
| 652 # check if failed | |
| 653 if (exists($opts{'S'})) { | |
| 654 my $cs = `cut -f $covcol '$exonfile' `; | |
| 655 my @c = split(/\n/,$cs); | |
| 656 @c = sort { $a <=> $b } @c; | |
| 657 if ($c[0] >= $thresh) { | |
| 658 # lowest coverage > threshold => skip | |
| 659 next; | |
| 660 } | |
| 661 } | |
| 662 # print Rscript | |
| 663 open OUT, ">$wd/Rout/exonplot.R"; | |
| 664 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
| 665 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n"; | |
| 666 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
| 667 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n"; | |
| 668 my $width = 480 ; | |
| 669 my $height = 240 ; | |
| 670 my $exonstr = $exon; | |
| 671 $exonstr =~ s/\s/_/g; | |
| 672 $exon =~ s/_/ /g; | |
| 673 $exon =~ s/\|/ /g; | |
| 674 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.')'."\n"; | |
| 675 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n"; | |
| 676 if ($orient eq '-') { | |
| 677 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n"; | |
| 678 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
| 679 } | |
| 680 else { | |
| 681 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n"; | |
| 682 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
| 683 } | |
| 684 | |
| 685 print OUT 'lines(positions,log10(coverage))'."\n"; | |
| 686 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
| 687 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n"; | |
| 688 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n"; | |
| 689 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n"; | |
| 690 print OUT 'graphics.off()'."\n"; | |
| 691 close OUT; | |
| 692 # run R script | |
| 693 system("cd $wd/Rout && Rscript exonplot.R") == 0 || die "Could not run exonplot.R with following error '$!'\n"; | |
| 694 # Add to .TEX | |
| 695 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
| 696 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n"; | |
| 697 print TEX '\end{minipage}'."\n"; | |
| 698 $col++; | |
| 699 } | |
| 700 } | |
| 701 ## list failed exons | |
| 702 if (exists($opts{'L'})) { | |
| 703 # count columns | |
| 704 my $subtitle; | |
| 705 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
| 706 chomp($head); | |
| 707 my @cols = split(/\t/,$head); | |
| 708 my $nrcols = scalar(@cols); | |
| 709 my $covcol = $nrcols; | |
| 710 my $poscol = $nrcols -1; | |
| 711 ## hash to print | |
| 712 # tex section header | |
| 713 open TEX, ">>$wd/Report/Report.tex"; | |
| 714 print TEX '\subsection*{List of Failed Exons}'."\n"; | |
| 715 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n"; | |
| 716 print TEX '{\footnotesize\begin{longtable}[l]{@{\extracolsep{\fill}}llll}'."\n".'\hline'."\n"; | |
| 717 print TEX '\textbf{Target Name} & \textbf{Genomic Position} & \textbf{Avg.Coverage} & \textbf{Min.Coverage} \\\\'."\n".'\hline'."\n"; | |
| 718 print TEX '\endhead'."\n"; | |
| 719 print TEX '\hline '."\n".'\multicolumn{4}{r}{{\textsl{\footnotesize Continued on next page}}} \\\\ '."\n".'\hline' ."\n". '\endfoot' . "\n". '\endlastfoot' . "\n"; | |
| 720 | |
| 721 $col = 1; | |
| 722 open IN, "$wd/Targets.Global.Coverage"; | |
| 723 while (<IN>) { | |
| 724 chomp($_); | |
| 725 my @p = split(/\t/,$_); | |
| 726 my $region = $p[0].'-'.$p[1].'-'.$p[2]; | |
| 727 my $exon = $filehash{$region}{'exon'}; | |
| 728 # grep exon to tmp file | |
| 729 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt"; | |
| 730 ## determine transcript orientation. | |
| 731 my $firstline = `head -n 1 $exonfile`; | |
| 732 my @firstcols = split(/\t/,$firstline); | |
| 733 my $orient = $firstcols[5]; | |
| 734 my $genomicchr = $firstcols[0]; | |
| 735 my $genomicstart = $firstcols[1]; | |
| 736 my $genomicstop = $firstcols[2]; | |
| 737 | |
| 738 if ($orient eq '+') { | |
| 739 my $bps = $genomicstop - $genomicstart + 1; | |
| 740 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); | |
| 741 | |
| 742 } | |
| 743 else { | |
| 744 my $bps = $genomicstop - $genomicstart + 1; | |
| 745 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); | |
| 746 } | |
| 747 | |
| 748 # check if failed | |
| 749 my $cs = `cut -f $covcol '$exonfile' `; | |
| 750 my @c = split(/\n/,$cs); | |
| 751 my ($avg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@c); | |
| 752 | |
| 753 if ($min >= $thresh) { | |
| 754 # lowest coverage > threshold => skip | |
| 755 next; | |
| 756 } | |
| 757 | |
| 758 # print to .tex table | |
| 759 if (length($exon) > 30) { | |
| 760 $exon = substr($exon,0,27) . '...'; | |
| 761 } | |
| 762 $exon =~ s/_/ /g; | |
| 763 $exon =~ s/\|/ /g; | |
| 764 | |
| 765 print TEX "$exon & $subtitle & ".int($avg)." & $min ".'\\\\'."\n"; | |
| 766 } | |
| 767 close IN; | |
| 768 print TEX '\hline'."\n"; | |
| 769 print TEX '\end{longtable}}'."\n"; | |
| 770 close TEX; | |
| 771 } | |
| 772 | |
| 773 | |
| 774 ## Close document | |
| 775 open OUT, ">>$wd/Report/Report.tex"; | |
| 776 print OUT '\label{endofdoc}'."\n"; | |
| 777 print OUT '\end{document}'."\n"; | |
| 778 close OUT; | |
| 779 system("cd $wd/Report && tectonic Report.tex") == 0 || die "Could not run tectonic with following error '$!'\n"; | |
| 780 | |
| 781 ## mv report to output file | |
| 782 system("cp -f $wd/Report/Report.pdf '$pdffile'"); | |
| 783 ##create tar.gz file | |
| 784 system("mkdir $wd/Results"); | |
| 785 system("cp -Rf $wd/Plots $wd/Results/"); | |
| 786 system("cp -Rf $wd/Report/ $wd/Results/"); | |
| 787 if (-e "$wd/Targets.Global.Coverage") { | |
| 788 system("cp -Rf $wd/Targets.Global.Coverage $wd/Results/"); | |
| 789 } | |
| 790 if (-e "$wd/Targets.Position.Coverage") { | |
| 791 system("cp -Rf $wd/Targets.Position.Coverage $wd/Results/"); | |
| 792 } | |
| 793 | |
| 794 | |
| 795 exit; | |
| 796 | |
| 797 ############### | |
| 798 ## FUNCTIONS ## | |
| 799 ############### | |
| 800 sub arraystats{ | |
| 801 my @array = @_; | |
| 802 my $count = scalar(@array); | |
| 803 @array = sort { $a <=> $b } @array; | |
| 804 # median | |
| 805 my $median = 0; | |
| 806 if ($count % 2) { | |
| 807 $median = $array[int($count/2)]; | |
| 808 } else { | |
| 809 $median = ($array[$count/2] + $array[$count/2 - 1]) / 2; | |
| 810 } | |
| 811 # average | |
| 812 my $sum = 0; | |
| 813 foreach (@array) { $sum += $_; } | |
| 814 my $average = $sum / $count; | |
| 815 # quantiles (rounded) | |
| 816 my $quart = int($count/4) ; | |
| 817 my $first = $array[$quart]; | |
| 818 my $third = $array[($quart*3)]; | |
| 819 my $min = $array[0]; | |
| 820 my $max = $array[($count-1)]; | |
| 821 return ($average,$median,$min,$max,$first,$third,$sum); | |
| 822 } | |
| 823 | |
| 824 sub GlobalSummary { | |
| 825 my ($bam,$targets) = @_; | |
| 826 | |
| 827 my $command = "cd $wd && coverageBed -abam $bam -b $targets > $wd/Targets.Global.Coverage"; | |
| 828 if (exists($commandsrun{$command})) { | |
| 829 return; | |
| 830 } | |
| 831 system($command); | |
| 832 $commandsrun{$command} = 1; | |
| 833 } | |
| 834 | |
| 835 sub CoveragePerRegion { | |
| 836 my ($bam,$targets) = @_; | |
| 837 my $command = "cd $wd && coverageBed -abam $bam -b $targets > $wd/Targets.Global.Coverage"; | |
| 838 if (exists($commandsrun{$command})) { | |
| 839 return; | |
| 840 } | |
| 841 system($command); | |
| 842 $commandsrun{$command} = 1; | |
| 843 } | |
| 844 | |
| 845 sub SubRegionCoverage { | |
| 846 my ($bam,$targets) = @_; | |
| 847 my $command = "cd $wd && coverageBed -abam $bam -b $targets -d > $wd/Targets.Position.Coverage"; | |
| 848 system($command); | |
| 849 $commandsrun{$command} = 1; | |
| 850 } | |
| 851 |
