Mercurial > repos > bornea > dotplot_runner
comparison Dotplot_Release/Step4_biclustering.R @ 0:dfa3436beb67 draft
Uploaded
| author | bornea |
|---|---|
| date | Fri, 29 Jan 2016 09:56:02 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfa3436beb67 |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 | |
| 3 args <- commandArgs(trailingOnly = TRUE) | |
| 4 | |
| 5 d = read.delim(args[1], header=T, sep="\t", as.is=T, row.names=1) | |
| 6 | |
| 7 clusters = read.delim("Clusters", header=T, sep="\t", as.is=T)[,-1] | |
| 8 clusters = data.frame(Bait=colnames(clusters), Cluster=as.numeric(clusters[1,])) | |
| 9 nested.clusters = read.delim("NestedClusters", header=F, sep="\t", as.is=T)[1:dim(d)[1],] | |
| 10 nested.phi = read.delim("NestedMu", header=F, sep="\t", as.is=T)[1:dim(d)[1],] | |
| 11 nested.phi2 = read.delim("NestedSigma2", header=F, sep="\t", as.is=T)[1:dim(d)[1],] | |
| 12 mcmc = read.delim("MCMCparameters", header=F, sep="\t", as.is=T) | |
| 13 | |
| 14 ### distance between bait using phi (also reorder cluster names) | |
| 15 ### report nested clusters with positive counts only | |
| 16 ### rearrange rows and columns of the raw data matrix according to the back-tracking algorithm | |
| 17 | |
| 18 recursivePaste = function(x) { | |
| 19 n = length(x) | |
| 20 x = x[order(x)] | |
| 21 y = x[1] | |
| 22 if(n > 1) { | |
| 23 for(i in 2:n) y = paste(y, x[i], sep="-") | |
| 24 } | |
| 25 y | |
| 26 } | |
| 27 | |
| 28 calcDist = function(x, y) { | |
| 29 if(length(x) != length(y)) stop("different length\n") | |
| 30 else res = sum(abs(x-y)) | |
| 31 res | |
| 32 } | |
| 33 | |
| 34 | |
| 35 #clusters, nested.clusters, nested.phi, d | |
| 36 | |
| 37 bcl = clusters | |
| 38 pcl = nested.clusters | |
| 39 phi = nested.phi | |
| 40 phi2 = nested.phi2 | |
| 41 dat = d | |
| 42 | |
| 43 | |
| 44 ## bipartite graph | |
| 45 make.graphlet = function(b,p,s) { | |
| 46 g = NULL | |
| 47 g$b = b | |
| 48 g$p = p | |
| 49 g$s = as.numeric(s) | |
| 50 g | |
| 51 } | |
| 52 | |
| 53 make.hub = function(b,p) { | |
| 54 g = NULL | |
| 55 g$b = b | |
| 56 g$p = p | |
| 57 g | |
| 58 } | |
| 59 | |
| 60 jaccard = function(x,y) { | |
| 61 j = length(intersect(x,y)) / length(union(x,y)) | |
| 62 j | |
| 63 } | |
| 64 | |
| 65 merge.graphlets = function(x, y) { | |
| 66 g = NULL | |
| 67 g$b = union(x$b, y$b) | |
| 68 g$p = union(x$p, y$p) | |
| 69 g$s1 = rep(0,length(g$p)) | |
| 70 g$s2 = rep(0,length(g$p)) | |
| 71 g$s1[match(x$p, g$p)] = x$s | |
| 72 g$s2[match(y$p, g$p)] = y$s | |
| 73 g$s = apply(cbind(g$s1, g$s2), 1, max) | |
| 74 g | |
| 75 } | |
| 76 | |
| 77 summarizeDP = function(bcl, pcl, phi, phi2, dat, hub.size=0.5, ...) { | |
| 78 pcl = as.matrix(pcl) | |
| 79 phi = as.matrix(phi) | |
| 80 phi2 = as.matrix(phi2) | |
| 81 dat = as.matrix(dat) | |
| 82 rownames(phi) = rownames(dat) | |
| 83 rownames(phi2) = rownames(dat) | |
| 84 | |
| 85 ubcl = unique(as.numeric(bcl$Cluster)) | |
| 86 n = length(ubcl) | |
| 87 pcl = pcl[,ubcl] | |
| 88 phi = phi[,ubcl] | |
| 89 phi2 = phi2[,ubcl] | |
| 90 phi[phi < 0.05] = 0 | |
| 91 | |
| 92 bcl$Cluster = match(as.numeric(bcl$Cluster), ubcl) | |
| 93 colnames(pcl) = colnames(phi) = colnames(phi2) = paste("CL", 1:n, sep="") | |
| 94 | |
| 95 ## remove non-reproducible mean values | |
| 96 nprey = dim(dat)[1]; nbait = dim(dat)[2] | |
| 97 preys = rownames(dat); baits = colnames(dat) | |
| 98 n = length(unique(bcl$Cluster)) | |
| 99 for(j in 1:n) { | |
| 100 id = c(1:nbait)[bcl$Cluster == j] | |
| 101 for(k in 1:nprey) { | |
| 102 do.it = ifelse(mean(as.numeric(dat[k,id]) > 0) <= 0.5,TRUE,FALSE) | |
| 103 if(do.it) { | |
| 104 phi[k,j] = 0 | |
| 105 } | |
| 106 } | |
| 107 } | |
| 108 | |
| 109 ## create bipartite graphs (graphlets) | |
| 110 gr = NULL | |
| 111 for(j in 1:n) { | |
| 112 id = c(1:nbait)[bcl$Cluster == j] | |
| 113 id2 = c(1:nprey)[phi[,j] > 0] | |
| 114 gr[[j]] = make.graphlet(baits[id], preys[id2], phi[id2,j]) | |
| 115 } | |
| 116 | |
| 117 ## intersecting preys between graphlets | |
| 118 gr2 = NULL | |
| 119 cur = 1 | |
| 120 for(i in 1:n) { | |
| 121 for(j in 1:n) { | |
| 122 if(i != j) { | |
| 123 combine = jaccard(gr[[i]]$p, gr[[j]]$p) >= 0.75 | |
| 124 if(combine) { | |
| 125 gr2[[cur]] = merge.graphlets(gr[[i]], gr[[j]]) | |
| 126 cur = cur + 1 | |
| 127 } | |
| 128 } | |
| 129 } | |
| 130 } | |
| 131 | |
| 132 old.phi = phi | |
| 133 phi = phi[, bcl$Cluster] | |
| 134 phi2 = phi2[, bcl$Cluster] | |
| 135 ## find hub preys | |
| 136 proceed = apply(old.phi, 1, function(x) sum(x>0) >= 2) | |
| 137 h = NULL | |
| 138 cur = 1 | |
| 139 for(k in 1:nprey) { | |
| 140 if(proceed[k]) { | |
| 141 id = as.numeric(phi[k,]) > 0 | |
| 142 if(mean(id) >= hub.size) { | |
| 143 h[[cur]] = make.hub(baits[id], preys[k]) | |
| 144 cur = cur + 1 | |
| 145 } | |
| 146 } | |
| 147 } | |
| 148 nhub = cur - 1 | |
| 149 | |
| 150 res = list(data=dat, baitCL=bcl, phi=phi, phi2=phi2, gr = gr, gr2 = gr2, hub = h) | |
| 151 res | |
| 152 } | |
| 153 | |
| 154 res = summarizeDP(clusters, nested.clusters, nested.phi, nested.phi2, d) | |
| 155 | |
| 156 write.table(res$baitCL[order(res$baitCL$Cluster),], "baitClusters", sep="\t", quote=F, row.names=F) | |
| 157 write.table(res$data, "clusteredData", sep="\t", quote=F) | |
| 158 | |
| 159 ##### SOFT | |
| 160 library(gplots) | |
| 161 tmpd = res$data | |
| 162 tmpm = res$phi | |
| 163 colnames(tmpm) = paste(colnames(res$data), colnames(tmpm)) | |
| 164 | |
| 165 pdf("estimated.pdf", height=25, width=8) | |
| 166 my.hclust<-hclust(dist(tmpd)) | |
| 167 my.dend<-as.dendrogram(my.hclust) | |
| 168 tmp.res = heatmap.2(tmpm, Rowv=my.dend, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) | |
| 169 #tmp.res = heatmap.2(tmpm, Rowv=T, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) | |
| 170 tmpd = tmpd[rev(tmp.res$rowInd),tmp.res$colInd] | |
| 171 write.table(tmpd, "clustered_matrix.txt", sep="\t", quote=F) | |
| 172 heatmap.2(tmpd, Rowv=F, Colv=F, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) | |
| 173 dev.off() | |
| 174 | |
| 175 | |
| 176 ### Statistical Plots | |
| 177 dd = dist(1-cor((res$phi), method="pearson")) | |
| 178 dend = as.dendrogram(hclust(dd, "ave")) | |
| 179 #plot(dend) | |
| 180 | |
| 181 pdf("bait2bait.pdf") | |
| 182 tmp = res$phi | |
| 183 colnames(tmp) = paste(colnames(res$phi), res$baitCL$Bait, sep="_") | |
| 184 | |
| 185 ###dd = cor(tmp[,-26]) ### This line is only for Chris' data (one bait has all zeros in the estimated parameters) | |
| 186 dd = cor(tmp) ### This line is only for Chris' data (one bait has all zeros in the estimated parameters) | |
| 187 | |
| 188 write.table(dd, "bait2bait_matrix.txt", sep="\t", quote=F) | |
| 189 heatmap.2(as.matrix(dd), trace="n", breaks=seq(-1,1,by=0.1), col=(greenred(20)), cexRow=0.7, cexCol=0.7) | |
| 190 dev.off() | |
| 191 | |
| 192 tmp = mcmc[,2] | |
| 193 ymax = max(tmp) | |
| 194 ymin = min(tmp) | |
| 195 pdf("stats.pdf", height=12, width=12) | |
| 196 | |
| 197 plot(mcmc[mcmc[,4]=="G",3], type="s", xlab="Iterations", ylab="Number of Clusters", main="") | |
| 198 plot(mcmc[,2], type="l", xlab="Iterations", ylab="Log-Likelihood", main="", ylim=c(ymin,ymax)) | |
| 199 | |
| 200 dev.off() | |
| 201 |
