annotate ezBAMQC/test-data/output/data/smp_correlation.r @ 15:28cebcc7f774

Uploaded
author cshl-bsr
date Wed, 30 Mar 2016 12:15:18 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 library(corrplot)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2 srcfiles = c("test1/data/smp0.geneAbundance.txt","test1/data/smp1.geneAbundance.txt","test1/data/smp2.geneAbundance.txt")
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 destfile = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_corr.png"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4 f1 = read.delim(srcfiles[1],header=T)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 MM=matrix(nrow=length(f1[,1]),ncol=length(srcfiles))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 rownames(MM)=f1[,1]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7 MM[,1]=f1[,2]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 for (i in 2:length(srcfiles)){
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 f = read.delim(srcfiles[i],header=T)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10 MM[,i] = f[,2] }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 colnames(MM)=c("smp0","smp1","smp2")
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 libSize<-colSums(MM)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 MM<-t(t(MM)*1000000/libSize)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14 ss<-rowSums(MM)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 M1<-MM[ss>0,]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 MM_s<-t(scale(t(M1)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 M.cor<-cor(MM_s,method='sp')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18 M.cor[is.na(M.cor)]<- 0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 png(destfile,width=500,height=500,units='px')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 corrplot(M.cor,is.corr=T,order='FPC',method='color',type='full',add=F,diag=T)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 dev.state = dev.off()
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 nz_genes = length(M1[,1])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 destfile = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_reproducibility.png"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 if(nz_genes >0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 png(destfile,width=500,height=500,units='px')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 nz_gene_mm = rep(0,length(M1[1,]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 for(i in 1:length(M1[1,])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 nz_gene_mm[i] = length(which(M1[,i]>0))/nz_genes * 100 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 bplt <- barplot(nz_gene_mm,beside=T,border='NA',space=1.5,ylim=c(0,100),ylab='Genes reproducibly detected (%)',col='blue',names.arg=colnames(MM))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30 text(y= nz_gene_mm+2, x= bplt, labels=paste(as.character(round(nz_gene_mm,digits=1)),'%',sep=''), xpd=TRUE)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 dev.state = dev.off()}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 destfile = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_var.png"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 png(destfile,width=500,height=500,units='px')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 mad = rep(0,length(M1[,1]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 nz_gene_median = rep(0,length(M1[,1]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 for(i in 1:length(M1[,1])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37 nz_gene_median[i] = median(M1[i,])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 mad[i] = median(abs(M1[i,]-nz_gene_median[i])) }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 mad2 = mad[nz_gene_median >0]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 nz_gene_median2 = nz_gene_median[nz_gene_median>0]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 mad_vs_median = mad2/nz_gene_median2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 nz_gene_median3 = log(nz_gene_median2, base=2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 dd<-data.frame(nz_gene_median3,mad_vs_median)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 x = densCols(nz_gene_median3,mad_vs_median, colramp=colorRampPalette(c('black', 'white')))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 dd$dens <- col2rgb(x)[1,] + 1L
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F", "#FCFF00", "#FF9400", "#FF3100"))(256)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 dd$col <- cols[dd$dens]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 plot(mad_vs_median ~ nz_gene_median3,data=dd[order(dd$dens),], col=col, pch=20,xlab="Gene expression (median RPM log2)",ylab="Median absolute deviation/median")
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 dev.state = dev.off()
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 destfile = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_cov.png"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 png(destfile,width=500,height=500,units='px')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 xname=c("<0.5","0.5-10","10-100",">=100")
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 Fn_mm = matrix(0,nrow=length(xname),ncol=length(M1[1,]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 rownames(Fn_mm) = xname
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 colnames(Fn_mm) = c("smp0","smp1","smp2")
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 for(i in 1:length(M1[1,])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57 Fn_mm[1,i] = length(which(M1[,i]<0.5))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 Fn_mm[2,i] = length(which(M1[,i]>=0.5 & M1[,i]<10))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 Fn_mm[3,i] = length(which(M1[,i]>=10 & M1[,i]<100))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 Fn_mm[4,i] = length(which(M1[,i]>=100)) }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 barplot(Fn_mm,main="Gene abundance (RPM)",xlab="Sample",ylab="Frequency",col=c("green","blue","red","yellow"),legend=xname)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 dev.state = dev.off()
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 destfile3 = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_qual.png"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 srcfiles3 = c("test1/data/smp0.mapq_profile.xls","test1/data/smp1.mapq_profile.xls","test1/data/smp2.mapq_profile.xls")
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 png(destfile3,width=500,height=500,units='px')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 xname=c("<3","3-10","10-20","20-30",">=30")
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 Fn_mm = matrix(0,nrow=length(xname),ncol=length(srcfiles3))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 rownames(Fn_mm) = xname
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 colnames(Fn_mm) = c("smp0","smp1","smp2")
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 for(i in 1:length(srcfiles3)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 f = read.delim(srcfiles3[i],header=T)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 if(length(which(f[,1]<3)) >0){ Fn_mm[1,i] = sum(f[which(f[,1]<3),3])/f[1,2]}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 if(length(which(f[,1]>=3 & f[,1]<10)) >0) {Fn_mm[2,i] = sum(f[which(f[,1]<10 & f[,1]>=3),3])/f[1,2]}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 if(length(which(f[,1]>=10 & f[,1]<20)) >0) {Fn_mm[3,i] = sum(f[which(f[,1]<20 & f[,1]>=10),3])/f[1,2] }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 if(length(which(f[,1]>=20 & f[,1]<30)) >0) {Fn_mm[4,i] = sum(f[which(f[,1]<30 & f[,1]>=20),3])/f[1,2]}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 if(length(which(f[,1]>=30)) >0) {Fn_mm[5,i] = sum(f[which(f[,1]>=30),3])/f[1,2] }}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 barplot(Fn_mm,xlab="Sample",main="Mapping Quality",ylim=c(0,1),ylab="Frequency",col=c("blue","green","yellow","orange","red"),legend=xname)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 dev.state = dev.off()