Mercurial > repos > geert-vandeweyer > coverage_report
annotate CoverageReport.pl @ 27:576bfc1586f9 draft default tip
bugfix
| author | geert-vandeweyer | 
|---|---|
| date | Wed, 29 Nov 2017 08:39:31 -0500 | 
| parents | 859999cb135b | 
| children | 
| rev | line source | 
|---|---|
| 1 | 1 #!/usr/bin/perl | 
| 2 | |
| 3 # load modules | |
| 4 use Getopt::Std; | |
| 5 use File::Basename; | |
| 6 use Number::Format; | |
| 7 | |
| 8 # number format | |
| 9 my $de = new Number::Format(-thousands_sep =>',',-decimal_point => '.'); | |
| 10 | |
| 11 ########## | |
| 12 ## opts ## | |
| 13 ########## | |
| 14 ## input files | |
| 15 # b : path to input (b)am file | |
| 16 # t : path to input (t)arget regions in BED format | |
| 17 ## output files | |
| 18 # o : report pdf (o)utput file | |
| 19 # z : all plots and tables in tar.g(z) format | |
| 20 ## entries in the report | |
| 21 # r : Coverage per (r)egion (boolean) | |
| 22 # s : (s)ubregion coverage if average < specified (plots for positions along target region) (boolean) | |
| 23 # S : (S)ubregion coverage for ALL failed exons => use either s OR S or you will have double plots. | |
| 24 # A : (A)ll exons will be plotted. | |
| 25 # L : (L)ist failed exons instead of plotting | |
| 26 # m : (m)inimal Coverage threshold | |
| 27 # f : fraction of average as threshold | |
| 28 # n : sample (n)ame. | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 29 # T : collapse overlapping Target regions. | 
| 1 | 30 | 
| 24 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 31 getopts('b:t:o:z:rsSALm:n:f:T', \%opts) ; | 
| 1 | 32 | 
| 33 # make output directory in (tmp) working dir | |
| 34 our $wd = "/tmp/Coverage.".int(rand(1000)); | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 35 | 
| 1 | 36 while (-d $wd) { | 
| 37 $wd = "/tmp/Coverage.".int(rand(1000)); | |
| 24 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 38 | 
| 1 | 39 } | 
| 40 system("mkdir $wd"); | |
| 41 ## variables | |
| 42 our %commandsrun = (); | |
| 43 | |
| 44 if (!exists($opts{'b'}) || !-e $opts{'b'}) { | |
| 45 die('Bam File not found'); | |
| 46 } | |
| 47 if (!exists($opts{'t'}) || !-e $opts{'t'}) { | |
| 48 die('Target File (BED) not found'); | |
| 49 } | |
| 50 | |
| 51 if (exists($opts{'m'})) { | |
| 52 $thresh = $opts{'m'}; | |
| 53 } | |
| 54 else { | |
| 55 $thresh = 40; | |
| 56 } | |
| 57 | |
| 58 if (exists($opts{'f'})) { | |
| 59 $frac = $opts{'f'}; | |
| 60 } | |
| 61 else { | |
| 62 $frac = 0.2; | |
| 63 } | |
| 64 | |
| 65 if (exists($opts{'o'})) { | |
| 66 $pdffile = $opts{'o'}; | |
| 67 } | |
| 68 else { | |
| 69 $pdffile = "$wd/CoverageReport.pdf"; | |
| 70 } | |
| 71 | |
| 72 if (exists($opts{'z'})) { | |
| 73 $tarfile = $opts{'z'}; | |
| 74 } | |
| 75 else { | |
| 76 $tarfile = "$wd/Results.tar.gz"; | |
| 77 } | |
| 78 | |
| 24 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 79 ## 0. Collapse overlapping target regions. | 
| 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 80 if (defined($opts{'T'})) { | 
| 25 
6cb012c8497a
Added BED format check before collapsing regions.
 geert-vandeweyer parents: 
24diff
changeset | 81 ## check BED format. Must have 6 cols if using this. | 
| 
6cb012c8497a
Added BED format check before collapsing regions.
 geert-vandeweyer parents: 
24diff
changeset | 82 my $head = `head -n 1 $opts{'t'}`; | 
| 
6cb012c8497a
Added BED format check before collapsing regions.
 geert-vandeweyer parents: 
24diff
changeset | 83 chomp; | 
| 
6cb012c8497a
Added BED format check before collapsing regions.
 geert-vandeweyer parents: 
24diff
changeset | 84 my @c = split(/\t/,$head); | 
| 
6cb012c8497a
Added BED format check before collapsing regions.
 geert-vandeweyer parents: 
24diff
changeset | 85 if (scalar(@c) < 6) { | 
| 
6cb012c8497a
Added BED format check before collapsing regions.
 geert-vandeweyer parents: 
24diff
changeset | 86 die("Targets BED file must be in 6-column format for collapsings. See tool documentation for more info.\n"); | 
| 
6cb012c8497a
Added BED format check before collapsing regions.
 geert-vandeweyer parents: 
24diff
changeset | 87 } | 
| 24 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 88 my $targets = $opts{'t'}; | 
| 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 89 my $tmptargets = "$wd/collapsedtargets.bed"; | 
| 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 90 system("sort -k1,1 -k2,2n $targets > $wd/sorted.targets.bed"); | 
| 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 91 system("bedtools merge -s -scores max -nms -i $wd/sorted.targets.bed > $tmptargets"); | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 92 open IN, $tmptargets; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 93 open OUT, ">$wd/collapsed.targets.renamed.bed"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 94 # we assume that overlapping fragments come from isoforms, not from different genes. | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 95 my %counters = (); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 96 my @genes = (); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 97 while (<IN>) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 98 chomp; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 99 my @p = split(/\t/,$_); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 100 my @g = split(/,/,$p[3]); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 101 $g[0] =~ m/(\S+)\(.*/; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 102 my $gene = $1; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 103 if (!defined($counters{$gene})) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 104 push(@genes,$gene); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 105 $counters{$gene}{'lines'} = (); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 106 $counters{$gene}{'orient'} = $p[5]; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 107 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 108 $p[3] = $gene."(COLLAPSED)"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 109 push(@{$counters{$gene}{'lines'}},\@p); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 110 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 111 close IN; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 112 foreach my $gene (@genes) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 113 if ($counters{$gene}{'orient'} eq '-') { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 114 my $idx = scalar(@{$counters{$gene}{'lines'}}) + 1; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 115 foreach my $line (@{$counters{$gene}{'lines'}}) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 116 $idx--; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 117 $line->[3] .= "|Region_$idx"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 118 print OUT join("\t",@$line)."\n"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 119 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 120 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 121 else { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 122 my $idx = 0; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 123 foreach my $line (@{$counters{$gene}{'lines'}}) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 124 $idx++; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 125 $line->[3] .= "|Region_$idx"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 126 print OUT join("\t",@$line)."\n"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 127 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 128 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 129 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 130 close OUT; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 131 $opts{'t'} = "$wd/collapsed.targets.renamed.bed"; | 
| 24 
fd788f9db899
Added (default) option to collapse repetitive bed files
 geert-vandeweyer parents: 
22diff
changeset | 132 } | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 133 # 1. Coverage per exon | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 134 # included in 2. | 
| 1 | 135 | 
| 136 # 2. Coverage per position | |
| 137 &SubRegionCoverage($opts{'b'}, $opts{'t'}); | |
| 138 our %filehash; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 139 our $tcov; | 
| 1 | 140 if (exists($opts{'s'}) || exists($opts{'S'}) || exists($opts{'A'}) || exists($opts{'L'})) { | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 141 system("mkdir -p $wd/SplitFiles"); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 142 system("rm $wd/SplitFiles/*"); | 
| 1 | 143 ## get position coverages | 
| 144 ## split input files | |
| 145 open IN, "$wd/Targets.Position.Coverage"; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 146 open BCOVSUM, ">$wd/Results/".$opts{'n'}.".Average_Region_Coverage.txt"; | 
| 1 | 147 my $fileidx = 0; | 
| 148 my $currreg = ''; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 149 my $elength = 0; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 150 my $esum = 0; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 151 my $eline = ""; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 152 my %out = (); | 
| 1 | 153 while (<IN>) { | 
| 154 my $line = $_; | |
| 155 chomp($line); | |
| 156 my @p = split(/\t/,$line); | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 157 my $reg = $p[0].'-'.$p[1].'-'.$p[2]. ": $p[3]"; | 
| 1 | 158 my $ex = $p[3]; | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 159 my $epos = $p[1]; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 160 # average exon coverage calculation. | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 161 if (!defined($out{$ex})) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 162 $out{$ex} = (); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 163 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 164 if (!defined($out{$ex}{$p[0]})) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 165 $out{$ex}{$p[0]} = (); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 166 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 167 # needs to be transcript specific ($ex) and position specific ($epos) to handle both isoforms and PAR/multiple mapping situations. | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 168 if (!defined($out{$ex}{$p[0]}{$epos})) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 169 $out{$ex}{$p[0]}{$epos}{'r'} = "$p[0]\t$p[1]\t$p[2]\t$p[3]\t$p[4]\t$p[5]"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 170 $out{$ex}{$p[0]}{$epos}{'c'} = (); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 171 | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 172 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 173 push(@{$out{$ex}{$p[0]}{$epos}{'c'}},$p[-1]); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 174 # splitting files | 
| 1 | 175 if ($reg ne $currreg) { | 
| 176 ## new exon open new outfile | |
| 177 if ($currreg ne '') { | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 178 print BCOVSUM "$eline\t".($esum/$elength)."\n"; | 
| 1 | 179 ## filehandle is open. close it | 
| 180 close OUT; | |
| 181 } | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 182 $eline = "$p[0]\t$p[1]\t$p[2]\t$p[3]\t$p[4]\t$p[5]"; #\t$p[6]\t$p[7]\t$p[8]"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 183 $esum = 0; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 184 $elength = 0; | 
| 1 | 185 if (!exists($filehash{$reg})) { | 
| 186 $fileidx++; | |
| 187 $filehash{$reg}{'idx'} = $fileidx; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 188 $filehash{$reg}{'exon'} = $reg; | 
| 1 | 189 open OUT, ">> $wd/SplitFiles/File_$fileidx.txt"; | 
| 190 $currreg = $reg; | |
| 191 } | |
| 192 else { | |
| 193 open OUT, ">> $wd/SplitFiles/File_".$filehash{$reg}{'idx'}.".txt"; | |
| 194 $currreg = $reg; | |
| 195 } | |
| 196 } | |
| 197 ## print the line to the open filehandle. | |
| 198 print OUT "$line\n"; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 199 $esum += $p[-1]; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 200 $elength++; | 
| 1 | 201 } | 
| 202 close OUT; | |
| 203 close IN; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 204 if ($esum > 0) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 205 print BCOVSUM "$eline\t".($esum/$elength)."\n"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 206 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 207 close BCOVSUM; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 208 open OUT, ">$wd/avg.tcov.txt"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 209 | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 210 foreach my $tr_ex (sort {$a cmp $b} keys(%out)) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 211 foreach my $chr (sort {$a cmp $b} keys(%{$out{$tr_ex}})) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 212 foreach(sort {$a <=> $b} keys(%{$out{$tr_ex}{$chr}})) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 213 my ($avg,$nr,$nrcov) = GetStats(\@{$out{$tr_ex}{$chr}{$_}{'c'}}); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 214 my $frac = 0; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 215 if ($nr > 0) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 216 $frac = ($nrcov / $nr); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 217 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 218 print OUT $out{$tr_ex}{$chr}{$_}{'r'}."\t".$avg."\t".$nrcov."\t".$nr."\t".$frac."\n"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 219 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 220 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 221 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 222 close OUT; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 223 $tcov = "$wd/avg.tcov.txt"; | 
| 1 | 224 | 
| 225 } | |
| 226 ## sort output files according to targets file | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 227 my %hash = (); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 228 open IN, $tcov; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 229 while (<IN>) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 230 my @p = split(/\t/,$_) ; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 231 $hash{$p[3].':'.$p[1]} = $_; | 
| 1 | 232 } | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 233 close IN; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 234 open OUT, ">$tcov"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 235 open IN, $opts{'t'}; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 236 while (<IN>) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 237 my @p = split(/\t/,$_) ; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 238 print OUT $hash{$p[3].':'.$p[1]}; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 239 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 240 close IN; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 241 close OUT; | 
| 1 | 242 | 
| 243 | |
| 244 #################################### | |
| 245 ## PROCESS RESULTS & CREATE PLOTS ## | |
| 246 #################################### | |
| 247 system("mkdir $wd/Report"); | |
| 248 | |
| 249 system("mkdir $wd/Rout"); | |
| 250 system("mkdir $wd/Plots"); | |
| 251 | |
| 252 $samplename = $opts{'n'}; | |
| 253 $samplename =~ s/_/\\_/g; | |
| 254 | |
| 255 # 0. Preamble | |
| 256 ## compose preamble | |
| 257 open OUT, ">$wd/Report/Report.tex"; | |
| 258 print OUT '\documentclass[a4paper,10pt]{article}'."\n"; | |
| 259 print OUT '\usepackage[left=2cm,top=1.5cm,right=1.5cm,bottom=2.5cm,nohead]{geometry}'."\n"; | |
| 260 print OUT '\usepackage{longtable}'."\n"; | |
| 261 print OUT '\usepackage[T1]{fontenc}'."\n"; | |
| 262 print OUT '\usepackage{fancyhdr}'."\n"; | |
| 263 print OUT '\usepackage[latin9]{inputenc}'."\n"; | |
| 264 print OUT '\usepackage{color}'."\n"; | |
| 265 print OUT '\usepackage[pdftex]{graphicx}'."\n"; | |
| 266 print OUT '\definecolor{grey}{RGB}{160,160,160}'."\n"; | |
| 267 print OUT '\definecolor{darkgrey}{RGB}{100,100,100}'."\n"; | |
| 268 print OUT '\definecolor{red}{RGB}{255,0,0}'."\n"; | |
| 269 print OUT '\definecolor{orange}{RGB}{238,118,0}'."\n"; | |
| 270 print OUT '\setlength\LTleft{0pt}'."\n"; | |
| 271 print OUT '\setlength\LTright{0pt}'."\n"; | |
| 272 print OUT '\begin{document}'."\n"; | |
| 273 print OUT '\pagestyle{fancy}'."\n"; | |
| 274 print OUT '\fancyhead{}'."\n"; | |
| 275 print OUT '\renewcommand{\footrulewidth}{0.4pt}'."\n"; | |
| 276 print OUT '\renewcommand{\headrulewidth}{0pt}'."\n"; | |
| 277 print OUT '\fancyfoot[R]{\today\hspace{2cm}\thepage\ of \pageref{endofdoc}}'."\n"; | |
| 278 print OUT '\fancyfoot[C]{}'."\n"; | |
| 279 print OUT '\fancyfoot[L]{Coverage Report for ``'.$samplename.'"}'."\n"; | |
| 280 print OUT '\let\oldsubsubsection=\subsubsection'."\n"; | |
| 281 print OUT '\renewcommand{\subsubsection}{%'."\n"; | |
| 282 print OUT ' \filbreak'."\n"; | |
| 283 print OUT ' \oldsubsubsection'."\n"; | |
| 284 print OUT '}'."\n"; | |
| 285 # main title | |
| 286 print OUT '\section*{Coverage Report for ``'.$samplename.'"}'."\n"; | |
| 287 close OUT; | |
| 288 | |
| 289 # 1. Summary Report | |
| 290 # Get samtools flagstat summary of BAM file | |
| 291 my $flagstat = `samtools flagstat $opts{'b'}`; | |
| 292 my @s = split(/\n/,$flagstat); | |
| 293 # Get number of reads mapped in total | |
| 294 ## updated on 2012-10-1 !! | |
| 295 $totalmapped = $s[2]; | |
| 296 $totalmapped =~ s/^(\d+)(\s.+)/$1/; | |
| 297 # count columns | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 298 my $head = `head -n 1 $tcov`; | 
| 1 | 299 chomp($head); | 
| 300 my @cols = split(/\t/,$head); | |
| 301 my $nrcols = scalar(@cols); | |
| 302 my $covcol = $nrcols - 3; | |
| 303 # get min/max/median/average coverage => values | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 304 my $covs = `cut -f $covcol $tcov`; | 
| 1 | 305 my @coverages = split(/\n/,$covs); | 
| 306 my ($eavg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@coverages); | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 307 my $spec = ''; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 308 if ($totalmapped != 0 && $totalmapped ne '') { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 309 $spec = sprintf("%.1f",($ontarget / $totalmapped)*100); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 310 } | 
| 1 | 311 # get min/max/median/average coverage => boxplot in R | 
| 312 open OUT, ">$wd/Rout/boxplot.R"; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 313 print OUT 'coverage <- read.table("'.$tcov.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | 
| 1 | 314 print OUT 'coverage <- coverage[,'.$covcol.']'."\n"; | 
| 22 
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing  this out.
 geert-vandeweyer parents: 
12diff
changeset | 315 print OUT 'png(file="../Plots/CoverageBoxPlot.png", bg="white", width=240, height=480,type=c("cairo"))'."\n"; | 
| 1 | 316 print OUT 'boxplot(coverage,range=1.5,main="Target Region Coverage")'."\n"; | 
| 317 print OUT 'graphics.off()'."\n"; | |
| 318 close OUT; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 319 print "Running boxplot.R : \n"; | 
| 12 
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
 geert-vandeweyer parents: 
11diff
changeset | 320 system("cd $wd/Rout && Rscript boxplot.R"); | 
| 1 | 321 | 
| 322 ## global nt coverage plot | |
| 323 ## use perl to make histogram (lower memory) | |
| 324 open IN, "$wd/Targets.Position.Coverage"; | |
| 325 my %dens; | |
| 326 my $counter = 0; | |
| 327 my $sum = 0; | |
| 328 while (<IN>) { | |
| 329 chomp(); | |
| 330 my @p = split(/\t/); | |
| 331 $sum += $p[-1]; | |
| 332 $counter++; | |
| 333 if (defined($dens{$p[-1]})) { | |
| 334 $dens{$p[-1]}++; | |
| 335 } | |
| 336 else { | |
| 337 $dens{$p[-1]} = 1; | |
| 338 } | |
| 339 } | |
| 340 $avg = $sum/$counter; | |
| 341 close IN; | |
| 342 open OUT, ">$wd/Rout/hist.txt"; | |
| 3 | 343 if (!defined($dens{'0'})) { | 
| 344 $dens{'0'} = 0; | |
| 345 } | |
| 1 | 346 foreach (keys(%dens)) { | 
| 347 print OUT "$_;$dens{$_}\n"; | |
| 348 } | |
| 349 close OUT; | |
| 350 open OUT, ">$wd/Rout/ntplot.R"; | |
| 351 # read coverage hist in R to plot | |
| 352 print OUT 'coverage <- read.table("hist.txt" , as.is = TRUE, header=FALSE,sep=";")'."\n"; | |
| 353 print OUT 'mincov <- '."$thresh \n"; | |
| 354 print OUT "avg <- round($avg)\n"; | |
| 355 print OUT "colnames(coverage) <- c('cov','count')\n"; | |
| 356 print OUT 'coverage$cov <- coverage$cov / avg'."\n"; | |
| 357 print OUT 'rep <- which(coverage$cov > 1)'."\n"; | |
| 358 print OUT 'coverage[coverage$cov > 1,1] <- 1'."\n"; | |
| 359 print OUT 'values <- coverage[coverage$cov < 1,]'."\n"; | |
| 360 print OUT 'values <- rbind(values,c(1,sum(coverage[coverage$cov == 1,"count"])))'."\n"; | |
| 361 print OUT 'values <- values[order(values$cov),]'."\n"; | |
| 362 print OUT 'prevcount <- 0'."\n"; | |
| 363 # make cumulative count data frame | |
| 364 print OUT 'for (i in rev(values$cov)) {'."\n"; | |
| 365 print OUT ' values[values$cov == i,"count"] <- prevcount + values[values$cov == i,"count"]'."\n"; | |
| 366 print OUT ' prevcount <- values[values$cov == i,"count"]'."\n"; | |
| 367 print OUT '}'."\n"; | |
| 368 print OUT 'values$count <- values$count / (values[values$cov == 0,"count"] / 100)'."\n"; | |
| 369 # get some values to plot lines. | |
| 370 print OUT 'mincov.x <- mincov/avg'."\n"; | |
| 371 print OUT 'if (mincov/avg <= 1) {'."\n"; | |
| 372 print OUT ' ii <- which(values$cov == mincov.x)'."\n"; | |
| 373 print OUT ' if (length(ii) == 1) {'."\n"; | |
| 374 print OUT ' mincov.y <- values[ii[1],"count"]'."\n"; | |
| 375 print OUT ' } else {'."\n"; | |
| 376 print OUT ' i1 <- max(which(values$cov < mincov.x))'."\n"; | |
| 377 print OUT ' i2 <- min(which(values$cov > mincov.x))'."\n"; | |
| 378 print OUT ' mincov.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(mincov.x - values[i1,"cov"]) + values[i1,"count"]'."\n"; | |
| 379 print OUT ' }'."\n"; | |
| 380 print OUT '}'."\n"; | |
| 381 # open output image and create plot | |
| 22 
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing  this out.
 geert-vandeweyer parents: 
12diff
changeset | 382 print OUT 'png(file="../Plots/CoverageNtPlot.png", bg="white", width=540, height=480,type=c("cairo"))'."\n"; | 
| 1 | 383 print OUT 'par(xaxs="i",yaxs="i")'."\n"; | 
| 384 print OUT 'plot(values$cov,values$count,ylim=c(0,100),pch=".",main="Cumulative Normalised Base-Coverage Plot",xlab="Normalizalised Coverage",ylab="Cumulative Nr. Of Bases")'."\n"; | |
| 385 print OUT 'lines(values$cov,values$count)'."\n"; | |
| 386 print OUT 'if (mincov.x <= 1) {'."\n"; | |
| 387 print OUT ' lines(c(mincov.x,mincov.x),c(0,mincov.y),lty=2,col="darkgreen")'."\n"; | |
| 388 print OUT ' lines(c(0,mincov.x),c(mincov.y,mincov.y),lty=2,col="darkgreen")'."\n"; | |
| 389 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold: '.$thresh.'x")'."\n"; | |
| 390 print OUT ' text(1,(91),pos=2,col="darkgreen",labels=paste("%Bases: ",round(mincov.y,2),"%",sep=""))'."\n"; | |
| 391 print OUT '} else {'."\n"; | |
| 392 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold ('.$thresh.'x) > Average")'."\n"; | |
| 393 print OUT ' text(1,(91),pos=2,col="darkgreen",labels="Plotting impossible")'."\n"; | |
| 394 print OUT '}'."\n"; | |
| 395 print OUT 'frac.x <- '."$frac\n"; | |
| 396 print OUT 'ii <- which(values$cov == frac.x)'."\n"; | |
| 397 print OUT 'if (length(ii) == 1) {'."\n"; | |
| 398 print OUT ' frac.y <- values[ii[1],"count"]'."\n"; | |
| 399 print OUT '} else {'."\n"; | |
| 400 print OUT ' i1 <- max(which(values$cov < frac.x))'."\n"; | |
| 401 print OUT ' i2 <- min(which(values$cov > frac.x))'."\n"; | |
| 402 print OUT ' frac.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(frac.x - values[i1,"cov"]) + values[i1,"count"]'."\n"; | |
| 403 print OUT '}'."\n"; | |
| 404 print OUT 'lines(c(frac.x,frac.x),c(0,frac.y),lty=2,col="red")'."\n"; | |
| 405 print OUT 'lines(c(0,frac.x),c(frac.y,frac.y),lty=2,col="red")'."\n"; | |
| 406 #iprint OUT 'text((frac.x+0.05),(frac.y - 2),pos=4,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n"; | |
| 407 #print OUT 'text((frac.x+0.05),(frac.y-5),pos=4,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n"; | |
| 408 print OUT 'text(1,86,pos=2,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n"; | |
| 409 print OUT 'text(1,82,pos=2,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n"; | |
| 410 | |
| 411 print OUT 'graphics.off()'."\n"; | |
| 412 | |
| 413 close OUT; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 414 print "Running ntplot.r\n"; | 
| 12 
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
 geert-vandeweyer parents: 
11diff
changeset | 415 system("cd $wd/Rout && Rscript ntplot.R"); | 
| 1 | 416 ## PRINT TO .TEX FILE | 
| 417 open OUT, ">>$wd/Report/Report.tex"; | |
| 418 # average coverage overviews | |
| 419 print OUT '\subsection*{Overall Summary}'."\n"; | |
| 420 print OUT '{\small '; | |
| 421 # left : boxplot | |
| 422 print OUT '\begin{minipage}{0.3\linewidth}\centering'."\n"; | |
| 423 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageBoxPlot.png}'."\n"; | |
| 424 print OUT '\end{minipage}'."\n"; | |
| 425 # right : cum.cov.plot | |
| 426 print OUT '\hspace{0.6cm}'."\n"; | |
| 427 print OUT '\begin{minipage}{0.65\linewidth}\centering'."\n"; | |
| 428 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageNtPlot.png}'."\n"; | |
| 429 print OUT '\end{minipage} \\\\'."\n"; | |
| 430 ## next line | |
| 431 print OUT '\begin{minipage}{0.48\linewidth}'."\n"; | |
| 432 print OUT '\vspace{-1.2em}'."\n"; | |
| 433 print OUT '\begin{tabular}{ll}'."\n"; | |
| 434 # bam statistics | |
| 435 print OUT '\multicolumn{2}{l}{\textbf{\underline{Samtools Flagstat Summary}}} \\\\'."\n"; | |
| 436 foreach (@s) { | |
| 437 $_ =~ m/^(\d+)\s(.+)$/; | |
| 438 my $one = $1; | |
| 439 my $two = $2; | |
| 440 $two =~ s/\s\+\s0\s//; | |
| 441 $two = ucfirst($two); | |
| 442 $one =~ s/%/\\%/g; | |
| 443 # remove '+ 0 ' from front | |
| 444 $two =~ s/\+\s0\s//; | |
| 445 # remove trailing from end | |
| 446 $two =~ s/(\s\+.*)|(:.*)/\)/; | |
| 447 $two =~ s/%/\\%/g; | |
| 448 $two =~ s/>=/\$\\ge\$/g; | |
| 449 $two = ucfirst($two); | |
| 450 print OUT '\textbf{'.$two.'} & '.$one.' \\\\'."\n"; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 451 | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 452 | 
| 1 | 453 } | 
| 454 print OUT '\end{tabular}\end{minipage}'."\n"; | |
| 455 print OUT '\hspace{1.5cm}'."\n"; | |
| 456 # target coverage statistics | |
| 457 print OUT '\begin{minipage}{0.4\linewidth}'."\n"; | |
| 458 #print OUT '\vspace{-4.8em}'."\n"; | |
| 459 print OUT '\begin{tabular}{ll}'."\n"; | |
| 460 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Region Coverage}}} \\\\'."\n"; | |
| 461 print OUT '\textbf{Number of Target Regions} & '.scalar(@coverages).' \\\\'."\n"; | |
| 462 print OUT '\textbf{Minimal Region Coverage} & '.$min.' \\\\'."\n"; | |
| 463 print OUT '\textbf{25\% Region Coverage} & '.$first.' \\\\'. "\n"; | |
| 464 print OUT '\textbf{50\% (Median) Region Coverage} & '.$med.' \\\\'. "\n"; | |
| 465 print OUT '\textbf{75\% Region Coverage} & '.$third.' \\\\'. "\n"; | |
| 466 print OUT '\textbf{Maximal Region Coverage} & '.$max.' \\\\'. "\n"; | |
| 467 print OUT '\textbf{Average Region Coverage} & '.int($eavg).' \\\\'. "\n"; | |
| 468 print OUT '\textbf{Mapped On Target} & '.$spec.' \\\\'."\n"; | |
| 469 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Base Coverage }}} \\\\'."\n"; | |
| 470 print OUT '\textbf{Number of Target Bases} & '.$counter.' \\\\'."\n"; | |
| 471 print OUT '\textbf{Average Base Coverage} & '.int($avg).' \\\\'. "\n"; | |
| 472 print OUT '\textbf{Non-Covered Bases} & '.$dens{'0'}.' \\\\'."\n"; | |
| 473 #print OUT '\textbf{Bases Covered $ge$ '.$frac.'xAvg.Cov} & '. | |
| 474 print OUT '\end{tabular}\end{minipage}}'."\n"; | |
| 475 close OUT; | |
| 476 | |
| 477 # 2. GLOBAL COVERAGE OVERVIEW PER GENE | |
| 478 @failedexons; | |
| 479 @allexons; | |
| 480 @allregions; | |
| 481 @failedregions; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 482 %failednames; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 483 %allnames; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 484 | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 485 if (exists($opts{'r'}) || exists($opts{'s'}) || exists($opts{'S'})|| exists($opts{'A'})) { | 
| 1 | 486 # count columns | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 487 my $head = `head -n 1 '$tcov'`; | 
| 1 | 488 chomp($head); | 
| 489 my @cols = split(/\t/,$head); | |
| 490 my $nrcols = scalar(@cols); | |
| 491 my $covcol = $nrcols - 3; | |
| 492 # Coverage Plots for each gene => barplots in R, table here. | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 493 open IN, "$tcov"; | 
| 1 | 494 my $currgroup = ''; | 
| 495 my $startline = 0; | |
| 496 my $stopline = 0; | |
| 497 $linecounter = 0; | |
| 498 while (<IN>) { | |
| 499 $linecounter++; | |
| 500 chomp($_); | |
| 501 my @c = split(/\t/,$_); | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 502 my $reg = $c[0].'-'.$c[1].'-'.$c[2]; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 503 push(@allregions,$reg); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 504 my $group = $reg .": ".$c[3]; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 505 #my $gene = $c[3]; | 
| 1 | 506 ## coverage failure? | 
| 507 if ($c[$nrcol-1] < 1 || $c[$covcol-1] < $thresh) { | |
| 508 push(@failedexons,$group); | |
| 509 push(@failedregions,$c[0].'-'.$c[1].'-'.$c[2]); | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 510 $failednames{$group} = $c[0].'-'.$c[1].'-'.$c[2]; | 
| 1 | 511 } | 
| 512 ## store exon | |
| 513 push(@allexons,$group); | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 514 $allnames{$group} = $c[0].'-'.$c[1].'-'.$c[2]; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 515 if (!exists($opts{'r'}) && !exists($opts{'s'}) && !exists($opts{'S'}) && exists($opts{'A'})) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 516 ## no need for barplots | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 517 next; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 518 } | 
| 1 | 519 ## extract and check gene | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 520 my $gene = $group; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 521 $gene =~ s/^chr\S+: (\S+)[\|\s](.+)/$1/; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 522 if ($gene ne $currgroup ) { | 
| 1 | 523 if ($currgroup ne '') { | 
| 524 # new gene, make plot. | |
| 525 open OUT, ">$wd/Rout/barplot.R"; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 526 print OUT 'coveragetable <- read.table("'.$tcov.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | 
| 1 | 527 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n"; | 
| 528 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n"; | |
| 529 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n"; | |
| 530 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
| 531 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n"; | |
| 532 # coverage not whole target region => orange | |
| 533 print OUT 'covperc <- coveragetable[c('.$startline.':'.$stopline.'),'.$nrcols.']'."\n"; | |
| 534 print OUT 'colors[covperc<1] <- "orange"'."\n"; | |
| 535 # coverage below threshold => red | |
| 536 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n"; | |
| 537 | |
| 538 if ($stopline - $startline > 20) { | |
| 539 $scale = 2; | |
| 540 } | |
| 541 else { | |
| 542 $scale = 1; | |
| 543 } | |
| 544 my $width = 480 * $scale; | |
| 545 my $height = 240 * $scale; | |
| 22 
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing  this out.
 geert-vandeweyer parents: 
12diff
changeset | 546 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n"; | 
| 1 | 547 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n"; | 
| 548 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)",ylim=ylim)'."\n"; | |
| 549 print OUT 'text(mp, log10(coverage) + '.(0.4/$scale).',format(coverage),xpd = TRUE,srt=90)'."\n"; | |
| 550 print OUT 'text(mp,par("usr")[3]-0.05,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n"; | |
| 551 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
| 552 print OUT 'graphics.off()'."\n"; | |
| 553 close OUT; | |
| 12 
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
 geert-vandeweyer parents: 
11diff
changeset | 554 system("cd $wd/Rout && Rscript barplot.R"); | 
| 1 | 555 if ($scale == 1) { | 
| 556 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
| 557 } | |
| 558 else { | |
| 559 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
| 560 } | |
| 561 | |
| 562 } | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 563 $currgroup = $gene; | 
| 1 | 564 $startline = $linecounter; | 
| 565 } | |
| 566 $stopline = $linecounter; | |
| 567 } | |
| 568 close IN; | |
| 569 if ($currgroup ne '') { | |
| 570 # last gene, make plot. | |
| 571 open OUT, ">$wd/Rout/barplot.R"; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 572 print OUT 'coveragetable <- read.table("'.$tcov.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | 
| 1 | 573 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n"; | 
| 574 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n"; | |
| 575 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n"; | |
| 576 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
| 577 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n"; | |
| 578 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n"; | |
| 579 | |
| 580 if ($stopline - $startline > 20) { | |
| 581 $scale = 2; | |
| 582 } | |
| 583 else { | |
| 584 $scale = 1; | |
| 585 } | |
| 586 my $width = 480 * $scale; | |
| 587 my $height = 240 * $scale; | |
| 22 
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing  this out.
 geert-vandeweyer parents: 
12diff
changeset | 588 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n"; | 
| 1 | 589 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n"; | 
| 590 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)", ylim=ylim)'."\n"; | |
| 591 print OUT 'text(mp, log10(coverage) + log10(2),format(coverage),xpd = TRUE,srt=90)'."\n"; | |
| 592 print OUT 'text(mp,par("usr")[3]-0.1,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n"; | |
| 593 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
| 594 print OUT 'graphics.off()'."\n"; | |
| 595 close OUT; | |
| 12 
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
 geert-vandeweyer parents: 
11diff
changeset | 596 system("cd $wd/Rout && Rscript barplot.R"); | 
| 1 | 597 if ($scale == 1) { | 
| 598 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
| 599 } | |
| 600 else { | |
| 601 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
| 602 } | |
| 603 } | |
| 604 ## print to TEX | |
| 605 open OUT, ">>$wd/Report/Report.tex"; | |
| 606 print OUT '\subsection*{Gene Summaries}'."\n"; | |
| 607 print OUT '\underline{Legend:} \\\\'."\n"; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 608 print OUT '{\color{red}\textbf{RED:} Average coverage did not reach set threshold of '.$thresh.'} \\\\'."\n"; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 609 print OUT '{\color{orange}\textbf{ORANGE:} Coverage was incomplete for the exon (section with zero coverage found). Overruled by red.} \\\\' ."\n"; | 
| 1 | 610 $col = 1; | 
| 611 foreach (@small) { | |
| 612 if ($col > 2) { | |
| 613 $col = 1; | |
| 614 print OUT "\n"; | |
| 615 } | |
| 616 print OUT '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
| 617 print OUT $_."\n"; | |
| 618 print OUT '\end{minipage}'."\n"; | |
| 619 $col++; | |
| 620 } | |
| 621 ## new line | |
| 622 if ($col == 2) { | |
| 623 print OUT '\\\\'." \n"; | |
| 624 } | |
| 625 foreach(@large) { | |
| 626 print OUT $_."\n"; | |
| 627 } | |
| 628 close OUT; | |
| 629 | |
| 630 } | |
| 631 | |
| 632 # 3. Detailed overview of failed exons (globally failed) | |
| 633 if (exists($opts{'s'})) { | |
| 634 # count columns | |
| 635 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
| 636 chomp($head); | |
| 637 my @cols = split(/\t/,$head); | |
| 638 my $nrcols = scalar(@cols); | |
| 639 my $covcol = $nrcols; | |
| 640 my $poscol = $nrcols -1; | |
| 641 # tex section header | |
| 642 open TEX, ">>$wd/Report/Report.tex"; | |
| 643 print TEX '\subsection*{Failed Exon Plots}'."\n"; | |
| 644 $col = 1; | |
| 645 print TEX '\underline{NOTE:} Only exons with global coverage $<$'.$thresh.' or incomplete coverage were plotted \\\\'."\n"; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 646 foreach(sort(keys(%failednames)) ) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 647 #foreach(@failedregions) { | 
| 1 | 648 if ($col > 2) { | 
| 649 $col = 1; | |
| 650 print TEX "\n"; | |
| 651 } | |
| 652 # which exon | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 653 my $group = $_; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 654 my ($region,$name) = split(/: /,$group); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 655 #my $region = $failednames{$_}; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 656 my $exon = $filehash{$group}{'exon'}; | 
| 1 | 657 # link exon to tmp file | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 658 my $exonfile = "$wd/SplitFiles/File_".$filehash{$group}{'idx'}.".txt"; | 
| 1 | 659 ## determine transcript orientation and location | 
| 660 my $firstline = `head -n 1 $exonfile`; | |
| 661 my @firstcols = split(/\t/,$firstline); | |
| 662 my $orient = $firstcols[5]; | |
| 663 my $genomicchr = $firstcols[0]; | |
| 664 my $genomicstart = $firstcols[1]; | |
| 665 my $genomicstop = $firstcols[2]; | |
| 666 if ($orient eq '+') { | |
| 667 $bps = $genomicstop - $genomicstart + 1; | |
| 668 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); | |
| 669 } | |
| 670 else { | |
| 671 $bps = $genomicstop - $genomicstart + 1; | |
| 672 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); | |
| 673 } | |
| 674 # print Rscript | |
| 675 open OUT, ">$wd/Rout/exonplot.R"; | |
| 676 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
| 677 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n"; | |
| 678 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
| 679 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n"; | |
| 680 | |
| 681 my $width = 480 ; | |
| 682 my $height = 240 ; | |
| 683 my $exonstr = $exon; | |
| 684 $exonstr =~ s/\s/_/g; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 685 $exonstr =~ s/:/_/g; | 
| 1 | 686 $exon =~ s/_/ /g; | 
| 687 $exon =~ s/\|/ /g; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 688 $exon =~ s/chr.*: (.*)$/$1/; | 
| 22 
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing  this out.
 geert-vandeweyer parents: 
12diff
changeset | 689 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n"; | 
| 1 | 690 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n"; | 
| 691 if ($orient eq '-') { | |
| 692 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n"; | |
| 693 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
| 694 } | |
| 695 else { | |
| 696 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n"; | |
| 697 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
| 698 } | |
| 699 print OUT 'lines(positions,log10(coverage))'."\n"; | |
| 700 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
| 701 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n"; | |
| 702 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n"; | |
| 703 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n"; | |
| 704 print OUT 'graphics.off()'."\n"; | |
| 705 close OUT; | |
| 706 # run R script | |
| 12 
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
 geert-vandeweyer parents: 
11diff
changeset | 707 system("cd $wd/Rout && Rscript exonplot.R"); | 
| 1 | 708 # Add to .TEX | 
| 709 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
| 710 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n"; | |
| 711 print TEX '\end{minipage}'."\n"; | |
| 712 $col++; | |
| 713 } | |
| 714 } | |
| 715 | |
| 716 ## plot failed (subregion) or all exons | |
| 717 if (exists($opts{'S'}) || exists($opts{'A'})) { | |
| 718 # count columns | |
| 719 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
| 720 chomp($head); | |
| 721 my @cols = split(/\t/,$head); | |
| 722 my $nrcols = scalar(@cols); | |
| 723 my $covcol = $nrcols; | |
| 724 my $poscol = $nrcols -1; | |
| 725 # tex section header | |
| 726 open TEX, ">>$wd/Report/Report.tex"; | |
| 727 print TEX '\subsection*{Failed Exon Plots}'."\n"; | |
| 728 if (exists($opts{'S'})) { | |
| 729 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n"; | |
| 730 } | |
| 731 elsif (exists($opts{'A'})) { | |
| 732 print TEX '\underline{NOTE:} ALL exons are plotted, regardless of coverage \\\\'."\n"; | |
| 733 } | |
| 734 $col = 1; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 735 foreach(sort(keys(%allnames))) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 736 | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 737 #foreach(@allregions) { | 
| 1 | 738 if ($col > 2) { | 
| 739 $col = 1; | |
| 740 print TEX "\n"; | |
| 741 } | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 742 my $group = $_; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 743 my ($region,$name) = split(/: /,$group); | 
| 1 | 744 # which exon | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 745 #my $region = $_; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 746 #my $region = $allnames{$_}; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 747 my $exon = $filehash{$group}{'exon'}; | 
| 1 | 748 # grep exon to tmp file | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 749 my $exonfile = "$wd/SplitFiles/File_".$filehash{$group}{'idx'}.".txt"; | 
| 1 | 750 ## determine transcript orientation. | 
| 751 my $firstline = `head -n 1 $exonfile`; | |
| 752 my @firstcols = split(/\t/,$firstline); | |
| 753 my $orient = $firstcols[5]; | |
| 754 my $genomicchr = $firstcols[0]; | |
| 755 my $genomicstart = $firstcols[1]; | |
| 756 my $genomicstop = $firstcols[2]; | |
| 757 | |
| 758 if ($orient eq '+') { | |
| 759 $bps = $genomicstop - $genomicstart + 1; | |
| 760 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); | |
| 761 | |
| 762 } | |
| 763 else { | |
| 764 $bps = $genomicstop - $genomicstart + 1; | |
| 765 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); | |
| 766 | |
| 767 } | |
| 768 | |
| 769 # check if failed | |
| 770 if (exists($opts{'S'})) { | |
| 771 my $cs = `cut -f $covcol '$exonfile' `; | |
| 772 my @c = split(/\n/,$cs); | |
| 773 @c = sort { $a <=> $b } @c; | |
| 774 if ($c[0] >= $thresh) { | |
| 775 # lowest coverage > threshold => skip | |
| 776 next; | |
| 777 } | |
| 778 } | |
| 779 # print Rscript | |
| 780 open OUT, ">$wd/Rout/exonplot.R"; | |
| 781 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
| 782 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n"; | |
| 783 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
| 784 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n"; | |
| 785 my $width = 480 ; | |
| 786 my $height = 240 ; | |
| 787 my $exonstr = $exon; | |
| 788 $exonstr =~ s/\s/_/g; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 789 $exonstr =~ s/:/_/g; | 
| 1 | 790 $exon =~ s/_/ /g; | 
| 791 $exon =~ s/\|/ /g; | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 792 $exon =~ s/^chr.*: (.*)$/$1/; | 
| 22 
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing  this out.
 geert-vandeweyer parents: 
12diff
changeset | 793 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n"; | 
| 1 | 794 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n"; | 
| 795 if ($orient eq '-') { | |
| 796 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n"; | |
| 797 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
| 798 } | |
| 799 else { | |
| 800 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n"; | |
| 801 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
| 802 } | |
| 803 | |
| 804 print OUT 'lines(positions,log10(coverage))'."\n"; | |
| 805 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
| 806 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n"; | |
| 807 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n"; | |
| 808 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n"; | |
| 809 print OUT 'graphics.off()'."\n"; | |
| 810 close OUT; | |
| 811 # run R script | |
| 12 
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
 geert-vandeweyer parents: 
11diff
changeset | 812 system("cd $wd/Rout && Rscript exonplot.R"); | 
| 1 | 813 # Add to .TEX | 
| 814 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
| 815 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n"; | |
| 816 print TEX '\end{minipage}'."\n"; | |
| 817 $col++; | |
| 818 } | |
| 819 } | |
| 820 ## list failed exons | |
| 821 if (exists($opts{'L'})) { | |
| 822 # count columns | |
| 823 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
| 824 chomp($head); | |
| 825 my @cols = split(/\t/,$head); | |
| 826 my $nrcols = scalar(@cols); | |
| 827 my $covcol = $nrcols; | |
| 828 my $poscol = $nrcols -1; | |
| 829 ## hash to print | |
| 830 # tex section header | |
| 831 open TEX, ">>$wd/Report/Report.tex"; | |
| 832 print TEX '\subsection*{List of Failed Exons}'."\n"; | |
| 833 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n"; | |
| 834 print TEX '{\footnotesize\begin{longtable}[l]{@{\extracolsep{\fill}}llll}'."\n".'\hline'."\n"; | |
| 835 print TEX '\textbf{Target Name} & \textbf{Genomic Position} & \textbf{Avg.Coverage} & \textbf{Min.Coverage} \\\\'."\n".'\hline'."\n"; | |
| 836 print TEX '\endhead'."\n"; | |
| 837 print TEX '\hline '."\n".'\multicolumn{4}{r}{{\textsl{\footnotesize Continued on next page}}} \\\\ '."\n".'\hline' ."\n". '\endfoot' . "\n". '\endlastfoot' . "\n"; | |
| 838 | |
| 839 $col = 1; | |
| 840 open IN, "$wd/Targets.Global.Coverage"; | |
| 841 while (<IN>) { | |
| 842 chomp($_); | |
| 843 my @p = split(/\t/,$_); | |
| 844 my $region = $p[0].'-'.$p[1].'-'.$p[2]; | |
| 845 my $exon = $filehash{$region}{'exon'}; | |
| 846 # grep exon to tmp file | |
| 847 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt"; | |
| 848 ## determine transcript orientation. | |
| 849 my $firstline = `head -n 1 $exonfile`; | |
| 850 my @firstcols = split(/\t/,$firstline); | |
| 851 my $orient = $firstcols[5]; | |
| 852 my $genomicchr = $firstcols[0]; | |
| 853 my $genomicstart = $firstcols[1]; | |
| 854 my $genomicstop = $firstcols[2]; | |
| 855 | |
| 856 if ($orient eq '+') { | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 857 #$bps = $genomicstop - $genomicstart + 1; | 
| 1 | 858 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); | 
| 859 | |
| 860 } | |
| 861 else { | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 862 #$bps = $genomicstop - $genomicstart + 1; | 
| 1 | 863 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); | 
| 864 } | |
| 865 | |
| 866 # check if failed | |
| 867 my $cs = `cut -f $covcol '$exonfile' `; | |
| 868 my @c = split(/\n/,$cs); | |
| 869 my ($avg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@c); | |
| 870 | |
| 871 if ($min >= $thresh) { | |
| 872 # lowest coverage > threshold => skip | |
| 873 next; | |
| 874 } | |
| 875 | |
| 876 # print to .tex table | |
| 877 if (length($exon) > 30) { | |
| 878 $exon = substr($exon,0,27) . '...'; | |
| 879 } | |
| 880 $exon =~ s/_/ /g; | |
| 881 $exon =~ s/\|/ /g; | |
| 882 | |
| 883 print TEX "$exon & $subtitle & ".int($avg)." & $min ".'\\\\'."\n"; | |
| 884 } | |
| 885 close IN; | |
| 886 print TEX '\hline'."\n"; | |
| 887 print TEX '\end{longtable}}'."\n"; | |
| 888 close TEX; | |
| 889 } | |
| 890 | |
| 891 | |
| 892 ## Close document | |
| 893 open OUT, ">>$wd/Report/Report.tex"; | |
| 894 print OUT '\label{endofdoc}'."\n"; | |
| 895 print OUT '\end{document}'."\n"; | |
| 896 close OUT; | |
| 897 system("cd $wd/Report && pdflatex Report.tex > /dev/null 2>&1 && pdflatex Report.tex > /dev/null 2>&1 "); | |
| 898 | |
| 899 ## mv report to output file | |
| 900 system("cp -f $wd/Report/Report.pdf '$pdffile'"); | |
| 901 ##create tar.gz file | |
| 902 system("mkdir $wd/Results"); | |
| 903 system("cp -Rf $wd/Plots $wd/Results/"); | |
| 904 system("cp -Rf $wd/Report/ $wd/Results/"); | |
| 905 if (-e "$wd/Targets.Global.Coverage") { | |
| 906 system("cp -Rf $wd/Targets.Global.Coverage $wd/Results/"); | |
| 907 } | |
| 908 if (-e "$wd/Targets.Position.Coverage") { | |
| 909 system("cp -Rf $wd/Targets.Position.Coverage $wd/Results/"); | |
| 910 } | |
| 911 | |
| 912 system("cd $wd && tar czf '$tarfile' Results/"); | |
| 913 ## clean up (galaxy stores outside wd) | |
| 27 | 914 system("rm -Rf $wd"); | 
| 1 | 915 ############### | 
| 916 ## FUNCTIONS ## | |
| 917 ############### | |
| 918 sub arraystats{ | |
| 919 my @array = @_; | |
| 920 my $count = scalar(@array); | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 921 if ($count == 0 ) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 922 return (0,0,0,0,0,0,0); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 923 } | 
| 1 | 924 @array = sort { $a <=> $b } @array; | 
| 925 # median | |
| 926 my $median = 0; | |
| 927 if ($count % 2) { | |
| 928 $median = $array[int($count/2)]; | |
| 929 } else { | |
| 930 $median = ($array[$count/2] + $array[$count/2 - 1]) / 2; | |
| 931 } | |
| 932 # average | |
| 933 my $sum = 0; | |
| 934 foreach (@array) { $sum += $_; } | |
| 935 my $average = $sum / $count; | |
| 936 # quantiles (rounded) | |
| 937 my $quart = int($count/4) ; | |
| 938 my $first = $array[$quart]; | |
| 939 my $third = $array[($quart*3)]; | |
| 940 my $min = $array[0]; | |
| 941 my $max = $array[($count-1)]; | |
| 942 return ($average,$median,$min,$max,$first,$third,$sum); | |
| 943 } | |
| 944 | |
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 945 sub GetStats { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 946 my ($aref) = @_; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 947 if (scalar(@$aref) == 0) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 948 return qw/0 0/; | 
| 1 | 949 } | 
| 26 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 950 # median | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 951 my @s = sort {$a <=> $b } @$aref; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 952 my $nrzero = 0; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 953 my $len = scalar(@s); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 954 for (my $i = 0; $i< $len;$i++) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 955 if ($s[$i] == 0) { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 956 $nrzero++; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 957 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 958 else { | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 959 last; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 960 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 961 } | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 962 my $nrcov = $len - $nrzero; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 963 # avg | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 964 my $avg = 0; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 965 foreach (@s) { $avg += $_ }; | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 966 $avg = sprintf("%.1f",($avg / scalar(@s))); | 
| 
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
 geert-vandeweyer parents: 
25diff
changeset | 967 return($avg,$len,$nrcov); | 
| 1 | 968 } | 
| 969 | |
| 970 | |
| 971 sub SubRegionCoverage { | |
| 972 my ($bam,$targets) = @_; | |
| 973 my $command = "cd $wd && coverageBed -abam $bam -b $targets -d > $wd/Targets.Position.Coverage"; | |
| 974 system($command); | |
| 975 $commandsrun{$command} = 1; | |
| 976 } | |
| 977 | 
