| 3 | 1 #!/usr/bin/env Rscript | 
|  | 2 | 
|  | 3 args <- commandArgs(trailingOnly = TRUE) | 
|  | 4 | 
|  | 5 d = read.delim(args[1], header=T, sep="\t", as.is=T, row.names=1) | 
|  | 6 | 
|  | 7 clusters = read.delim("Clusters", header=T, sep="\t", as.is=T)[,-1] | 
|  | 8 clusters = data.frame(Bait=colnames(clusters), Cluster=as.numeric(clusters[1,])) | 
|  | 9 nested.clusters = read.delim("NestedClusters", header=F, sep="\t", as.is=T)[1:dim(d)[1],] | 
|  | 10 nested.phi = read.delim("NestedMu", header=F, sep="\t", as.is=T)[1:dim(d)[1],] | 
|  | 11 nested.phi2 = read.delim("NestedSigma2", header=F, sep="\t", as.is=T)[1:dim(d)[1],] | 
|  | 12 mcmc = read.delim("MCMCparameters", header=F, sep="\t", as.is=T) | 
|  | 13 | 
|  | 14 ### distance between bait using phi (also reorder cluster names) | 
|  | 15 ### report nested clusters with positive counts only | 
|  | 16 ### rearrange rows and columns of the raw data matrix according to the back-tracking algorithm | 
|  | 17 | 
|  | 18 recursivePaste = function(x) { | 
|  | 19   n = length(x) | 
|  | 20   x = x[order(x)] | 
|  | 21   y = x[1] | 
|  | 22   if(n > 1) { | 
|  | 23     for(i in 2:n) y = paste(y, x[i], sep="-") | 
|  | 24   } | 
|  | 25   y | 
|  | 26 } | 
|  | 27 | 
|  | 28 calcDist = function(x, y) { | 
|  | 29   if(length(x) != length(y)) stop("different length\n") | 
|  | 30   else res = sum(abs(x-y)) | 
|  | 31   res | 
|  | 32 } | 
|  | 33 | 
|  | 34 | 
|  | 35 #clusters, nested.clusters, nested.phi, d | 
|  | 36 | 
|  | 37 bcl = clusters | 
|  | 38 pcl = nested.clusters | 
|  | 39 phi = nested.phi | 
|  | 40 phi2 = nested.phi2 | 
|  | 41 dat = d | 
|  | 42 | 
|  | 43 | 
|  | 44 ## bipartite graph | 
|  | 45 make.graphlet = function(b,p,s) { | 
|  | 46   g = NULL | 
|  | 47   g$b = b | 
|  | 48   g$p = p | 
|  | 49   g$s = as.numeric(s) | 
|  | 50   g | 
|  | 51 } | 
|  | 52 | 
|  | 53 make.hub = function(b,p) { | 
|  | 54   g = NULL | 
|  | 55   g$b = b | 
|  | 56   g$p = p | 
|  | 57   g | 
|  | 58 } | 
|  | 59 | 
|  | 60 jaccard = function(x,y) { | 
|  | 61   j = length(intersect(x,y)) / length(union(x,y)) | 
|  | 62   j | 
|  | 63 } | 
|  | 64 | 
|  | 65 merge.graphlets = function(x, y) { | 
|  | 66   g = NULL | 
|  | 67   g$b = union(x$b, y$b) | 
|  | 68   g$p = union(x$p, y$p) | 
|  | 69   g$s1 = rep(0,length(g$p)) | 
|  | 70   g$s2 = rep(0,length(g$p)) | 
|  | 71   g$s1[match(x$p, g$p)] = x$s | 
|  | 72   g$s2[match(y$p, g$p)] = y$s | 
|  | 73   g$s = apply(cbind(g$s1, g$s2), 1, max) | 
|  | 74   g | 
|  | 75 } | 
|  | 76 | 
|  | 77 summarizeDP = function(bcl, pcl, phi, phi2, dat, hub.size=0.5, ...) { | 
|  | 78   pcl = as.matrix(pcl) | 
|  | 79   phi = as.matrix(phi) | 
|  | 80   phi2 = as.matrix(phi2) | 
|  | 81   dat = as.matrix(dat) | 
|  | 82   rownames(phi) = rownames(dat) | 
|  | 83   rownames(phi2) = rownames(dat) | 
|  | 84 | 
|  | 85   ubcl = unique(as.numeric(bcl$Cluster)) | 
|  | 86   n = length(ubcl) | 
|  | 87   pcl = pcl[,ubcl] | 
|  | 88   phi = phi[,ubcl] | 
|  | 89   phi2 = phi2[,ubcl] | 
|  | 90   phi[phi < 0.05] = 0 | 
|  | 91 | 
|  | 92   bcl$Cluster = match(as.numeric(bcl$Cluster), ubcl) | 
|  | 93   colnames(pcl) = colnames(phi) = colnames(phi2) = paste("CL", 1:n, sep="") | 
|  | 94 | 
|  | 95   ## remove non-reproducible mean values | 
|  | 96   nprey = dim(dat)[1]; nbait = dim(dat)[2] | 
|  | 97   preys = rownames(dat); baits = colnames(dat) | 
|  | 98   n = length(unique(bcl$Cluster)) | 
|  | 99   for(j in 1:n) { | 
|  | 100     id = c(1:nbait)[bcl$Cluster == j] | 
|  | 101     for(k in 1:nprey) { | 
|  | 102       do.it = ifelse(mean(as.numeric(dat[k,id]) > 0) <= 0.5,TRUE,FALSE) | 
|  | 103       if(do.it) { | 
|  | 104         phi[k,j] = 0 | 
|  | 105       } | 
|  | 106     } | 
|  | 107   } | 
|  | 108 | 
|  | 109   ## create bipartite graphs (graphlets) | 
|  | 110   gr = NULL | 
|  | 111   for(j in 1:n) { | 
|  | 112     id = c(1:nbait)[bcl$Cluster == j] | 
|  | 113     id2 = c(1:nprey)[phi[,j] > 0] | 
|  | 114     gr[[j]] = make.graphlet(baits[id], preys[id2], phi[id2,j]) | 
|  | 115   } | 
|  | 116 | 
|  | 117   ## intersecting preys between graphlets | 
|  | 118   gr2 = NULL | 
|  | 119   cur = 1 | 
|  | 120   for(i in 1:n) { | 
|  | 121     for(j in 1:n) { | 
|  | 122       if(i != j) { | 
|  | 123         combine = jaccard(gr[[i]]$p, gr[[j]]$p) >= 0.75 | 
|  | 124         if(combine) { | 
|  | 125           gr2[[cur]] = merge.graphlets(gr[[i]], gr[[j]]) | 
|  | 126           cur = cur + 1 | 
|  | 127         } | 
|  | 128       } | 
|  | 129     } | 
|  | 130   } | 
|  | 131 | 
|  | 132   old.phi = phi | 
|  | 133   phi = phi[, bcl$Cluster] | 
|  | 134   phi2 = phi2[, bcl$Cluster] | 
|  | 135   ## find hub preys | 
|  | 136   proceed = apply(old.phi, 1, function(x) sum(x>0) >= 2) | 
|  | 137   h = NULL | 
|  | 138   cur = 1 | 
|  | 139   for(k in 1:nprey) { | 
|  | 140     if(proceed[k]) { | 
|  | 141       id = as.numeric(phi[k,]) > 0 | 
|  | 142       if(mean(id) >= hub.size) { | 
|  | 143         h[[cur]] = make.hub(baits[id], preys[k]) | 
|  | 144         cur = cur + 1 | 
|  | 145       } | 
|  | 146     } | 
|  | 147   } | 
|  | 148   nhub = cur - 1 | 
|  | 149 | 
|  | 150   res = list(data=dat, baitCL=bcl, phi=phi, phi2=phi2, gr = gr, gr2 = gr2, hub = h) | 
|  | 151   res | 
|  | 152 } | 
|  | 153 | 
|  | 154 res = summarizeDP(clusters, nested.clusters, nested.phi, nested.phi2, d) | 
|  | 155 | 
|  | 156 write.table(res$baitCL[order(res$baitCL$Cluster),], "baitClusters", sep="\t", quote=F, row.names=F) | 
|  | 157 write.table(res$data, "clusteredData", sep="\t", quote=F) | 
|  | 158 | 
|  | 159 ##### SOFT | 
|  | 160 library(gplots) | 
|  | 161 tmpd = res$data | 
|  | 162 tmpm = res$phi | 
|  | 163 colnames(tmpm) = paste(colnames(res$data), colnames(tmpm)) | 
|  | 164 | 
|  | 165 pdf("estimated.pdf", height=25, width=8) | 
|  | 166 my.hclust<-hclust(dist(tmpd)) | 
|  | 167 my.dend<-as.dendrogram(my.hclust) | 
|  | 168 tmp.res = heatmap.2(tmpm, Rowv=my.dend, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) | 
|  | 169 #tmp.res = heatmap.2(tmpm, Rowv=T, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) | 
|  | 170 tmpd = tmpd[rev(tmp.res$rowInd),tmp.res$colInd] | 
|  | 171 write.table(tmpd, "clustered_matrix.txt", sep="\t", quote=F) | 
|  | 172 heatmap.2(tmpd, Rowv=F, Colv=F, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) | 
|  | 173 dev.off() | 
|  | 174 | 
|  | 175 | 
|  | 176 ### Statistical Plots | 
|  | 177 dd = dist(1-cor((res$phi), method="pearson")) | 
|  | 178 dend = as.dendrogram(hclust(dd, "ave")) | 
|  | 179 #plot(dend) | 
|  | 180 | 
|  | 181 pdf("bait2bait.pdf") | 
|  | 182 tmp = res$phi | 
|  | 183 colnames(tmp) = paste(colnames(res$phi), res$baitCL$Bait, sep="_") | 
|  | 184 | 
|  | 185 ###dd = cor(tmp[,-26])    ### This line is only for Chris' data (one bait has all zeros in the estimated parameters) | 
|  | 186 dd = cor(tmp)    ### This line is only for Chris' data (one bait has all zeros in the estimated parameters) | 
|  | 187 | 
|  | 188 write.table(dd, "bait2bait_matrix.txt", sep="\t", quote=F) | 
|  | 189 heatmap.2(as.matrix(dd), trace="n", breaks=seq(-1,1,by=0.1), col=(greenred(20)), cexRow=0.7, cexCol=0.7) | 
|  | 190 dev.off() | 
|  | 191 | 
|  | 192 tmp = mcmc[,2] | 
|  | 193 ymax = max(tmp) | 
|  | 194 ymin = min(tmp) | 
|  | 195 pdf("stats.pdf", height=12, width=12) | 
|  | 196 | 
|  | 197 plot(mcmc[mcmc[,4]=="G",3], type="s", xlab="Iterations", ylab="Number of Clusters", main="") | 
|  | 198 plot(mcmc[,2], type="l", xlab="Iterations", ylab="Log-Likelihood", main="", ylim=c(ymin,ymax)) | 
|  | 199 | 
|  | 200 dev.off() | 
|  | 201 |