| 
0
 | 
     1 #!/usr/bin/env Rscript
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 args <- commandArgs(trailingOnly = TRUE)
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 d = read.delim(args[1], header=T, sep="\t", as.is=T, row.names=1)
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 clusters = read.delim("Clusters", header=T, sep="\t", as.is=T)[,-1]
 | 
| 
 | 
     8 clusters = data.frame(Bait=colnames(clusters), Cluster=as.numeric(clusters[1,]))
 | 
| 
 | 
     9 nested.clusters = read.delim("NestedClusters", header=F, sep="\t", as.is=T)[1:dim(d)[1],]
 | 
| 
 | 
    10 nested.phi = read.delim("NestedMu", header=F, sep="\t", as.is=T)[1:dim(d)[1],]
 | 
| 
 | 
    11 nested.phi2 = read.delim("NestedSigma2", header=F, sep="\t", as.is=T)[1:dim(d)[1],]
 | 
| 
 | 
    12 mcmc = read.delim("MCMCparameters", header=F, sep="\t", as.is=T)
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 ### distance between bait using phi (also reorder cluster names) 
 | 
| 
 | 
    15 ### report nested clusters with positive counts only
 | 
| 
 | 
    16 ### rearrange rows and columns of the raw data matrix according to the back-tracking algorithm
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 recursivePaste = function(x) {
 | 
| 
 | 
    19   n = length(x)
 | 
| 
 | 
    20   x = x[order(x)]
 | 
| 
 | 
    21   y = x[1]
 | 
| 
 | 
    22   if(n > 1) {
 | 
| 
 | 
    23     for(i in 2:n) y = paste(y, x[i], sep="-")
 | 
| 
 | 
    24   }
 | 
| 
 | 
    25   y
 | 
| 
 | 
    26 }
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 calcDist = function(x, y) {
 | 
| 
 | 
    29   if(length(x) != length(y)) stop("different length\n")
 | 
| 
 | 
    30   else res = sum(abs(x-y))
 | 
| 
 | 
    31   res
 | 
| 
 | 
    32 }
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 
 | 
| 
 | 
    35 #clusters, nested.clusters, nested.phi, d
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 bcl = clusters
 | 
| 
 | 
    38 pcl = nested.clusters
 | 
| 
 | 
    39 phi = nested.phi
 | 
| 
 | 
    40 phi2 = nested.phi2
 | 
| 
 | 
    41 dat = d
 | 
| 
 | 
    42 
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 ## bipartite graph
 | 
| 
 | 
    45 make.graphlet = function(b,p,s) {
 | 
| 
 | 
    46   g = NULL
 | 
| 
 | 
    47   g$b = b
 | 
| 
 | 
    48   g$p = p
 | 
| 
 | 
    49   g$s = as.numeric(s)
 | 
| 
 | 
    50   g
 | 
| 
 | 
    51 }
 | 
| 
 | 
    52 
 | 
| 
 | 
    53 make.hub = function(b,p) {
 | 
| 
 | 
    54   g = NULL
 | 
| 
 | 
    55   g$b = b
 | 
| 
 | 
    56   g$p = p
 | 
| 
 | 
    57   g
 | 
| 
 | 
    58 }
 | 
| 
 | 
    59 
 | 
| 
 | 
    60 jaccard = function(x,y) {
 | 
| 
 | 
    61   j = length(intersect(x,y)) / length(union(x,y))
 | 
| 
 | 
    62   j
 | 
| 
 | 
    63 }
 | 
| 
 | 
    64 
 | 
| 
 | 
    65 merge.graphlets = function(x, y) {
 | 
| 
 | 
    66   g = NULL
 | 
| 
 | 
    67   g$b = union(x$b, y$b)
 | 
| 
 | 
    68   g$p = union(x$p, y$p)
 | 
| 
 | 
    69   g$s1 = rep(0,length(g$p))
 | 
| 
 | 
    70   g$s2 = rep(0,length(g$p))
 | 
| 
 | 
    71   g$s1[match(x$p, g$p)] = x$s
 | 
| 
 | 
    72   g$s2[match(y$p, g$p)] = y$s
 | 
| 
 | 
    73   g$s = apply(cbind(g$s1, g$s2), 1, max)
 | 
| 
 | 
    74   g
 | 
| 
 | 
    75 }
 | 
| 
 | 
    76 
 | 
| 
 | 
    77 summarizeDP = function(bcl, pcl, phi, phi2, dat, hub.size=0.5, ...) {
 | 
| 
 | 
    78   pcl = as.matrix(pcl)
 | 
| 
 | 
    79   phi = as.matrix(phi)
 | 
| 
 | 
    80   phi2 = as.matrix(phi2)
 | 
| 
 | 
    81   dat = as.matrix(dat)
 | 
| 
 | 
    82   rownames(phi) = rownames(dat)
 | 
| 
 | 
    83   rownames(phi2) = rownames(dat)
 | 
| 
 | 
    84 
 | 
| 
 | 
    85   ubcl = unique(as.numeric(bcl$Cluster))
 | 
| 
 | 
    86   n = length(ubcl)
 | 
| 
 | 
    87   pcl = pcl[,ubcl]
 | 
| 
 | 
    88   phi = phi[,ubcl]
 | 
| 
 | 
    89   phi2 = phi2[,ubcl]
 | 
| 
 | 
    90   phi[phi < 0.05] = 0
 | 
| 
 | 
    91 
 | 
| 
 | 
    92   bcl$Cluster = match(as.numeric(bcl$Cluster), ubcl)
 | 
| 
 | 
    93   colnames(pcl) = colnames(phi) = colnames(phi2) = paste("CL", 1:n, sep="")
 | 
| 
 | 
    94 
 | 
| 
 | 
    95   ## remove non-reproducible mean values
 | 
| 
 | 
    96   nprey = dim(dat)[1]; nbait = dim(dat)[2]
 | 
| 
 | 
    97   preys = rownames(dat); baits = colnames(dat)
 | 
| 
 | 
    98   n = length(unique(bcl$Cluster))
 | 
| 
 | 
    99   for(j in 1:n) {
 | 
| 
 | 
   100     id = c(1:nbait)[bcl$Cluster == j]
 | 
| 
 | 
   101     for(k in 1:nprey) {
 | 
| 
 | 
   102       do.it = ifelse(mean(as.numeric(dat[k,id]) > 0) <= 0.5,TRUE,FALSE)
 | 
| 
 | 
   103       if(do.it) {
 | 
| 
 | 
   104         phi[k,j] = 0
 | 
| 
 | 
   105       }
 | 
| 
 | 
   106     }
 | 
| 
 | 
   107   }
 | 
| 
 | 
   108 
 | 
| 
 | 
   109   ## create bipartite graphs (graphlets)
 | 
| 
 | 
   110   gr = NULL
 | 
| 
 | 
   111   for(j in 1:n) {
 | 
| 
 | 
   112     id = c(1:nbait)[bcl$Cluster == j]
 | 
| 
 | 
   113     id2 = c(1:nprey)[phi[,j] > 0]
 | 
| 
 | 
   114     gr[[j]] = make.graphlet(baits[id], preys[id2], phi[id2,j])
 | 
| 
 | 
   115   }
 | 
| 
 | 
   116 
 | 
| 
 | 
   117   ## intersecting preys between graphlets
 | 
| 
 | 
   118   gr2 = NULL
 | 
| 
 | 
   119   cur = 1
 | 
| 
 | 
   120   for(i in 1:n) {
 | 
| 
 | 
   121     for(j in 1:n) {
 | 
| 
 | 
   122       if(i != j) {
 | 
| 
 | 
   123         combine = jaccard(gr[[i]]$p, gr[[j]]$p) >= 0.75
 | 
| 
 | 
   124         if(combine) {
 | 
| 
 | 
   125           gr2[[cur]] = merge.graphlets(gr[[i]], gr[[j]])
 | 
| 
 | 
   126           cur = cur + 1
 | 
| 
 | 
   127         }
 | 
| 
 | 
   128       }
 | 
| 
 | 
   129     }
 | 
| 
 | 
   130   }
 | 
| 
 | 
   131 
 | 
| 
 | 
   132   old.phi = phi
 | 
| 
 | 
   133   phi = phi[, bcl$Cluster]
 | 
| 
 | 
   134   phi2 = phi2[, bcl$Cluster]
 | 
| 
 | 
   135   ## find hub preys
 | 
| 
 | 
   136   proceed = apply(old.phi, 1, function(x) sum(x>0) >= 2)
 | 
| 
 | 
   137   h = NULL
 | 
| 
 | 
   138   cur = 1
 | 
| 
 | 
   139   for(k in 1:nprey) {
 | 
| 
 | 
   140     if(proceed[k]) {
 | 
| 
 | 
   141       id = as.numeric(phi[k,]) > 0
 | 
| 
 | 
   142       if(mean(id) >= hub.size) {
 | 
| 
 | 
   143         h[[cur]] = make.hub(baits[id], preys[k])
 | 
| 
 | 
   144         cur = cur + 1
 | 
| 
 | 
   145       }
 | 
| 
 | 
   146     }
 | 
| 
 | 
   147   }
 | 
| 
 | 
   148   nhub = cur - 1
 | 
| 
 | 
   149 
 | 
| 
 | 
   150   res = list(data=dat, baitCL=bcl, phi=phi, phi2=phi2, gr = gr, gr2 = gr2, hub = h)
 | 
| 
 | 
   151   res
 | 
| 
 | 
   152 }
 | 
| 
 | 
   153 
 | 
| 
 | 
   154 res = summarizeDP(clusters, nested.clusters, nested.phi, nested.phi2, d)
 | 
| 
 | 
   155 
 | 
| 
 | 
   156 write.table(res$baitCL[order(res$baitCL$Cluster),], "baitClusters", sep="\t", quote=F, row.names=F)
 | 
| 
 | 
   157 write.table(res$data, "clusteredData", sep="\t", quote=F)
 | 
| 
 | 
   158 
 | 
| 
 | 
   159 ##### SOFT
 | 
| 
 | 
   160 library(gplots)
 | 
| 
 | 
   161 tmpd = res$data
 | 
| 
 | 
   162 tmpm = res$phi
 | 
| 
 | 
   163 colnames(tmpm) = paste(colnames(res$data), colnames(tmpm))
 | 
| 
 | 
   164 
 | 
| 
 | 
   165 pdf("estimated.pdf", height=25, width=8)
 | 
| 
 | 
   166 my.hclust<-hclust(dist(tmpd))
 | 
| 
 | 
   167 my.dend<-as.dendrogram(my.hclust)
 | 
| 
 | 
   168 tmp.res = heatmap.2(tmpm, Rowv=my.dend, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4)
 | 
| 
 | 
   169 #tmp.res = heatmap.2(tmpm, Rowv=T, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4)
 | 
| 
 | 
   170 tmpd = tmpd[rev(tmp.res$rowInd),tmp.res$colInd]
 | 
| 
 | 
   171 write.table(tmpd, "clustered_matrix.txt", sep="\t", quote=F)
 | 
| 
 | 
   172 heatmap.2(tmpd, Rowv=F, Colv=F, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4)
 | 
| 
 | 
   173 dev.off()
 | 
| 
 | 
   174 
 | 
| 
 | 
   175 
 | 
| 
 | 
   176 ### Statistical Plots 
 | 
| 
 | 
   177 dd = dist(1-cor((res$phi), method="pearson"))
 | 
| 
 | 
   178 dend = as.dendrogram(hclust(dd, "ave"))
 | 
| 
 | 
   179 #plot(dend) 
 | 
| 
 | 
   180 
 | 
| 
 | 
   181 pdf("bait2bait.pdf")
 | 
| 
 | 
   182 tmp = res$phi
 | 
| 
 | 
   183 colnames(tmp) = paste(colnames(res$phi), res$baitCL$Bait, sep="_")
 | 
| 
 | 
   184 
 | 
| 
 | 
   185 ###dd = cor(tmp[,-26])    ### This line is only for Chris' data (one bait has all zeros in the estimated parameters)
 | 
| 
 | 
   186 dd = cor(tmp)    ### This line is only for Chris' data (one bait has all zeros in the estimated parameters)
 | 
| 
 | 
   187 
 | 
| 
 | 
   188 write.table(dd, "bait2bait_matrix.txt", sep="\t", quote=F)
 | 
| 
 | 
   189 heatmap.2(as.matrix(dd), trace="n", breaks=seq(-1,1,by=0.1), col=(greenred(20)), cexRow=0.7, cexCol=0.7)
 | 
| 
 | 
   190 dev.off()
 | 
| 
 | 
   191 
 | 
| 
 | 
   192 tmp = mcmc[,2]
 | 
| 
 | 
   193 ymax = max(tmp)
 | 
| 
 | 
   194 ymin = min(tmp)
 | 
| 
 | 
   195 pdf("stats.pdf", height=12, width=12)
 | 
| 
 | 
   196 
 | 
| 
 | 
   197 plot(mcmc[mcmc[,4]=="G",3], type="s", xlab="Iterations", ylab="Number of Clusters", main="")
 | 
| 
 | 
   198 plot(mcmc[,2], type="l", xlab="Iterations", ylab="Log-Likelihood", main="", ylim=c(ymin,ymax))
 | 
| 
 | 
   199 
 | 
| 
 | 
   200 dev.off()
 | 
| 
 | 
   201 
 |