0
|
1 #!/usr/bin/Rscript
|
|
2
|
|
3 infile=as.character(commandArgs()[6])
|
|
4 outdir=as.character(commandArgs()[7])
|
|
5 poor=as.numeric(commandArgs()[8])
|
|
6 high=as.numeric(commandArgs()[9])
|
|
7
|
|
8
|
|
9 outfile=paste(outdir,"/summary.html",sep="")
|
|
10 cutoff=c(poor,high)
|
|
11
|
|
12 xmlfiles<-bedfiles<-character(0)
|
|
13 indirs<-as.character(readLines(infile))
|
|
14 for(thisdir in indirs){
|
|
15 thisdir=paste(thisdir,"/data",sep="")
|
|
16 xmlfiles=c(xmlfiles,list.files(thisdir,pattern=".xml",full.names=TRUE))
|
|
17 bedfiles=c(bedfiles,list.files(thisdir,pattern=".bed",full.names=TRUE))
|
|
18 }
|
|
19 nsamples<-length(indirs)
|
|
20
|
|
21 samplenames<-character(0)
|
|
22 diffs<-matrix(nrow=5,ncol=nsamples)
|
|
23 averagecov<-stddevcov<-targetsize<-numreads_total<-numreads_target<-numeric(0)
|
|
24 sample1<-sample5<-sample10<-sample20<-sample30<-numeric(0)
|
|
25
|
|
26
|
|
27 ##Read XML input files
|
|
28 xmltag<-function(line){
|
|
29 return(strsplit(strsplit(line,">")[[1]][2],"</")[[1]][1])
|
|
30 }
|
|
31 i=1
|
|
32 nextfold=0
|
|
33 for(file in xmlfiles){
|
|
34 numreads<-numeric(0)
|
|
35 inlines<-readLines(file)
|
|
36 for(line in inlines){
|
|
37 if(length(grep("SampleName",line)>0)){samplenames<-c(samplenames,xmltag(line))}
|
|
38 if(length(grep("NumberReads",line)>0)){numreads<-c(numreads,as.numeric(xmltag(line)))}
|
|
39 if(length(grep("AvTargetCoverage",line)>0)){averagecov=c(averagecov,as.numeric(xmltag(line)))}
|
|
40 if(length(grep("SDTargetCoverage",line)>0)){stddevcov=c(stddevcov,as.numeric(xmltag(line)))}
|
|
41 if(length(grep("TargetSize",line)>0)){targetsize=as.numeric(xmltag(line))}
|
|
42 if(length(grep("<from1x>",line)>0)){nextfold=1}
|
|
43 else{
|
|
44 if((nextfold==1) && length(grep("PercBases",line))>0){
|
|
45 sample1<-c(sample1,as.numeric(xmltag(line)))
|
|
46 nextfold=0
|
|
47 }
|
|
48 }
|
|
49 if(length(grep("<from5x>",line)>0)){nextfold=5}
|
|
50 else{
|
|
51 if((nextfold==5) && length(grep("PercBases",line))>0){
|
|
52 sample5<-c(sample5,as.numeric(xmltag(line)))
|
|
53 nextfold=0
|
|
54 }
|
|
55 }
|
|
56 if(length(grep("<from10x>",line)>0)){nextfold=10}
|
|
57 else{
|
|
58 if((nextfold==10) && length(grep("PercBases",line))>0){
|
|
59 sample10<-c(sample10,as.numeric(xmltag(line)))
|
|
60 nextfold=0
|
|
61 }
|
|
62 }
|
|
63 if(length(grep("<from20x>",line)>0)){nextfold=20}
|
|
64 else{
|
|
65 if((nextfold==20) && length(grep("PercBases",line))>0){
|
|
66 sample20<-c(sample20,as.numeric(xmltag(line)))
|
|
67 nextfold=0
|
|
68 }
|
|
69 }
|
|
70 if(length(grep("<from30x>",line)>0)){nextfold=30}
|
|
71 else{
|
|
72 if((nextfold==30) && length(grep("PercBases",line))>0){
|
|
73 sample30<-c(sample30,as.numeric(xmltag(line)))
|
|
74 nextfold=0
|
|
75 }
|
|
76 }
|
|
77 }
|
|
78 numreads_total<-c(numreads_total,numreads[1])
|
|
79 numreads_target<-c(numreads_target,numreads[2])
|
|
80 diffs[,i]<-c(sample30[i],sample20[i]-sample30[i],sample10[i]-sample20[i],sample5[i]-sample10[i],sample1[i]-sample5[i])
|
|
81 i=i+1
|
|
82 }
|
|
83
|
|
84
|
|
85 ##Create barplot
|
|
86 rownames(diffs)<-c("Covered 30x","Covered 20x","Covered 10x","Covered 5x","Covered 1x")
|
|
87 plot_samplenames<-samplenames
|
|
88 s=0
|
|
89 for(sname in plot_samplenames){
|
|
90 s=s+1
|
|
91 if(nchar(sname)>10){plot_samplenames[s]<-paste("Sample ",s,"\n(table above)",sep="")}
|
|
92 }
|
|
93 colnames(diffs)<-plot_samplenames
|
|
94 png(file=paste(outdir,"/summary_plot.png",sep=""),width=66*(4+nsamples),height=400)
|
|
95 par(oma=c(1,0,0,1))
|
|
96 barplot(diffs,
|
|
97 legend=TRUE,
|
|
98 col=c("darkblue","steelblue1","tomato1","tomato3","tomato4"),
|
|
99 xlim=c(0,4+nsamples),ylim=c(0,100),
|
|
100 ylab="Percentage",las=2 ) #,
|
|
101 # args.legend=list(x=nsamples+4,y=80))
|
|
102 dev.off()
|
|
103
|
|
104
|
|
105
|
|
106 ##Read BED files
|
|
107 sametarget=TRUE
|
|
108 thisbed<-read.table(bedfiles[1],stringsAsFactors=FALSE)
|
|
109 genes<-levels(as.factor(thisbed$V4))
|
|
110 meancov<-matrix(nrow=length(genes),ncol=length(bedfiles))
|
|
111 chr<-start<-end<-character(0)
|
|
112
|
|
113 i=1
|
|
114 for(file in bedfiles){
|
|
115 thisbed<-read.table(file,stringsAsFactors=FALSE)
|
|
116 genes_thissample<-levels(as.factor(thisbed$V4))
|
|
117 if(length(genes_thissample)!=length(genes)){sametarget=FALSE}
|
|
118 for(j in 1:length(genes)){
|
|
119 thisgene<-thisbed[thisbed$V4==genes[j],]
|
|
120 chr[j]<-thisgene[1,1]
|
|
121 start[j]<-thisgene[1,2]
|
|
122 end[j]<-thisgene[1,3]
|
|
123 meancov[j,i]<-as.numeric(mean(thisgene[,5]))
|
|
124 }
|
|
125 i=i+1
|
|
126 }
|
|
127
|
|
128 meancov<-data.frame(chr,start,end,genes,meancov)
|
|
129 for(i in 1:nrow(meancov)){
|
|
130 print(i)
|
|
131 meancov$ave[i]<-round(mean(as.numeric(meancov[i,4+(1:nsamples)])),4)
|
|
132 meancov$stddev[i]<-round(sd(as.numeric(meancov[i,4+(1:nsamples)])),4)
|
|
133 meancov$mini[i]<-round(min(as.numeric(meancov[i,4+(1:nsamples)])),4)
|
|
134 meancov$maxi[i]<-round(max(as.numeric(meancov[i,4+(1:nsamples)])),4)
|
|
135 }
|
|
136 meancov_sorted<-meancov[order(meancov$ave),]
|
|
137 poorly_all<-meancov_sorted[meancov_sorted$max<cutoff[1],]
|
|
138
|
|
139
|
|
140 ##Write HTML output
|
|
141 cat(file=outfile,paste(
|
|
142 "<html>\n",
|
|
143 "<head>\n",
|
|
144 "<title>Enrichment Performance</title>\n",
|
|
145 "<style type=\"text/css\">\n",
|
|
146 "body{font-family:sans-serif;}\n",
|
|
147 "h2,h3{color: darkblue;}\n",
|
|
148 "a{color: darkblue;}\n",
|
|
149 " table.output td{\n",
|
|
150 " padding: 4px; background-color: lightskyblue;\n",
|
|
151 " border: 1px solid #000; border-color: darkblue;\n",
|
|
152 "}\n",
|
|
153 "</Style>\n",
|
|
154 "</head>\n",
|
|
155 "<body>\n",
|
|
156 "<h2>Target Enrichment Performance</h2><br/>\n",
|
|
157 "<table class=\"output\">\n",
|
|
158 " <tr>\n",
|
|
159 " <td><b>Sample</b></td><td><b># Reads</b></td><td><b># On Target</b></td><td><b>Coverage Mean</b></td><td><b>Coverage Std Dev</b></td><td><b>Covered 1x</b></td><td><b>Covered 5x</b></td><td><b>Covered 10x</b></td><td><b>Covered 20x</b></td><td><b>Covered 30x</b></td><td><b>Details</b></td>\n",
|
|
160 "</tr>\n",sep=""))
|
|
161 for(i in 1:nsamples){
|
|
162 cat(file=outfile,paste(
|
|
163 "<tr><td>",samplenames[i],"</td><td>",numreads_total[i],"</td>",
|
|
164 "<td>",numreads_target[i],"</td><td>",averagecov[i],"</td>",
|
|
165 "<td>",stddevcov[i],"</td><td>",sample1[i],"</td><td>",sample5[i],"</td>",
|
|
166 "<td>",sample10[i],"</td><td>",sample20[i],"</td><td>",sample30[i],"</td>",
|
|
167 "<td><a href=\"",rev(strsplit(indirs[i],"/")[[1]])[1],"/",samplenames[i],"_enrichment.html\">Show</a></td></tr>\n",
|
|
168 sep=""),append=TRUE)
|
|
169 }
|
|
170 cat(file=outfile,paste(
|
|
171 "</table>\n",
|
|
172 "<br/><br/>\n",
|
|
173 "<h2>Target Base Percentage Covered</h2>\n",
|
|
174 "<img src=\"summary_plot.png\"></img>\n",sep=""),append=TRUE)
|
|
175 if(!sametarget){
|
|
176 warning("Target regions are not the same for all samples.")
|
|
177 } else{
|
|
178 cat(file=outfile,paste(
|
|
179 "<h2>Poorly Covered in All Samples (Cutoff: ",cutoff[1],"x)</h2>\n",sep=""),append=TRUE)
|
|
180 if(nrow(poorly_all)==0){
|
|
181 cat(file=outfile,"<p>Nothing found for this cutoff.</p>",append=TRUE)
|
|
182 } else{
|
|
183 cat(file=outfile,paste(
|
|
184 "<table class=\"output\">\n",
|
|
185 "<tr>\n",
|
|
186 "<td><b>Target Region</b></td><td><b>Gene</b></td><td><b>Mean Across Samples</b></td><td><b>Std Dev Across Samples</b></td>\n",sep=""),append=TRUE)
|
|
187 for(i in 1:nrow(poorly_all)){
|
|
188 cat(file=outfile,paste(
|
|
189 "<tr>\n",
|
|
190 "<td>",poorly_all$chr[i],":",poorly_all$start[i],"-",poorly_all$end[i],"</td>\n",
|
|
191 "<td>",poorly_all$genes[i],"</td>\n",
|
|
192 "<td>",poorly_all$ave[i],"</td>\n",
|
|
193 "<td>",poorly_all$stddev[i],"</td>\n",
|
|
194 "<tr>\n",sep=""),append=TRUE)
|
|
195 }
|
|
196 cat(file=outfile,"</table>\n",append=TRUE)
|
|
197 }
|
|
198
|
|
199 meancov_sorted<-meancov[order(meancov$ave,decreasing=TRUE),] ##Highly Covered
|
|
200 highly_all<-meancov_sorted[meancov_sorted$min>cutoff[2],]
|
|
201 cat(file=outfile,paste(
|
|
202 "<h2>Highly Covered in All Samples (Cutoff: ",cutoff[2],"x)</h2>\n",sep=""),append=TRUE)
|
|
203 if(nrow(highly_all)==0){
|
|
204 cat(file=outfile,"<p>Nothing found for this cutoff.</p>",append=TRUE)
|
|
205 } else{
|
|
206 cat(file=outfile,paste("<table class=\"output\">\n",
|
|
207 "<tr>\n",
|
|
208 "<td><b>Target Region</b></td><td><b>Gene</b></td><td><b>Mean Across Sample</b></td><td><b>Std Dev Across Samples</b></td>\n",sep=""),append=TRUE)
|
|
209 for(i in 1:nrow(highly_all)){
|
|
210 cat(file=outfile,paste(
|
|
211 "<tr>\n",
|
|
212 "<td>",highly_all$chr[i],":",highly_all$start[i],"-",highly_all$end[i],"</td>\n",
|
|
213 "<td>",highly_all$genes[i],"</td>\n",
|
|
214 "<td>",highly_all$ave[i],"</td>\n",
|
|
215 "<td>",highly_all$stddev[i],"</td>\n",
|
|
216 "<tr>\n",sep=""),append=TRUE)
|
|
217 }
|
|
218 cat(file=outfile,"</table>\n",append=TRUE)
|
|
219 }
|
|
220
|
|
221 }
|
|
222
|
|
223 cat(file=outfile,"</body>\n</html>\n",append=TRUE)
|