changeset 0:dfa3436beb67 draft

Uploaded
author bornea
date Fri, 29 Jan 2016 09:56:02 -0500
parents
children 7e98f893e9ae
files Dotplot_Release/BaitCheck.pl Dotplot_Release/Normalization.R Dotplot_Release/Normalization_sigpreys.R Dotplot_Release/R_dotPlot.R Dotplot_Release/R_dotPlot_hc.R Dotplot_Release/R_dotPlot_nc.R Dotplot_Release/SOFD.pl Dotplot_Release/SaintConvert.pl Dotplot_Release/Step1_data_reformating.R Dotplot_Release/Step2_data_filtering.R Dotplot_Release/Step3_nestedcluster Dotplot_Release/Step4_biclustering.R Dotplot_Release/biclust.tar.gz Dotplot_Release/biclust_param.txt Dotplot_Release/dotplot.bash Dotplot_Release/pheatmap_j.R
diffstat 16 files changed, 1900 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/BaitCheck.pl	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,45 @@
+#!/usr/bin/perl
+
+# 27/04/2014
+
+if($#ARGV==0){
+	print "This program checks the number of baits in a Saint Output File.\n";
+	print "\nusage:\n $0\n-i [csv saint output file]]\n\n";
+	die;
+}
+else{
+	$i=0;
+	$cutoff=0.01;
+	while($i<=$#ARGV){
+		if($ARGV[$i] eq '-i'){
+			$i++;
+			$ifile=$ARGV[$i];
+		}
+		else{
+			die "\Incorrect program usage\n\n";
+		}
+		$i++;
+	}
+}
+
+$file='';
+open(IFILE,"<$ifile") || die "$ifile can't be opened: $!";
+{ local $/=undef;  $file=<IFILE>; }
+@lines=split /[\r\n]+/, $file;
+foreach $line (@lines) {
+	if($line =~ /^Bait/){
+	}
+	elsif($line =~ /^([^\t]+)/){
+		if($1 ne $bait[$baitn]){
+			$baitn++;
+			$bait[$baitn]=$1;
+		}
+	}
+	else{
+	}
+}	
+close(IFILE);
+
+print $baitn;
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/Normalization.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,44 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs(trailingOnly = TRUE)
+
+#this programs normalizes a saint input file based on the spectral counts of all preys
+
+d = read.delim(args[1], header=T, sep="\t", as.is=T)
+
+baitn = 1
+curr_bait <- d$Bait[1]
+s <- vector()
+s[1] = 0
+for(i in 1:length(d$Bait)){
+	if(curr_bait != d$Bait[i]){
+		baitn <- baitn + 1
+		curr_bait <- d$Bait[i]
+		s[baitn] <- d$AvgSpec[i]
+	}
+	else{
+		s[baitn] <- s[baitn] + d$AvgSpec[i]
+	}
+}
+
+med.s = median(s)
+s = s / med.s
+
+d_n <- d
+baitn = 1
+curr_bait <- d_n$Bait[1]
+for(i in 1:length(d_n$Bait)){
+	if(curr_bait != d_n$Bait[i]){
+		baitn <- baitn + 1
+		curr_bait <- d_n$Bait[i]
+		d_n$AvgSpec[i] <- d_n$AvgSpec[i]/s[baitn]
+	}
+	else{
+		d_n$AvgSpec[i] <- d_n$AvgSpec[i]/s[baitn]
+	}
+}
+
+#print normalized data to file
+
+write.table(d_n, file = "norm_saint.txt", sep="\t", quote=F, row.names=F)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/Normalization_sigpreys.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,46 @@
+#!/usr/bin/env Rscript
+
+#this programs normalizes a saint input file based on the spectral counts of "signficant" preys
+# that is, preys with an FDR <= the secondary cutoff as supplied to the dotplot script
+
+args <- commandArgs(trailingOnly = TRUE)
+
+d = read.delim(args[1], header=T, as.is=T)
+d <- d[d$BFDR <= as.numeric(args[2]),]
+
+baitn = 1
+curr_bait <- d$Bait[1]
+s <- vector()
+s[1] = 0
+for(i in 1:length(d$Bait)){
+	if(curr_bait != d$Bait[i]){
+		baitn <- baitn + 1
+		curr_bait <- d$Bait[i]
+		s[baitn] <- d$AvgSpec[i]
+	}
+	else{
+		s[baitn] <- s[baitn] + d$AvgSpec[i]
+	}
+}
+
+med.s = median(s)
+s = s / med.s
+
+d_n <- d
+baitn = 1
+curr_bait <- d_n$Bait[1]
+for(i in 1:length(d_n$Bait)){
+	if(curr_bait != d_n$Bait[i]){
+		baitn <- baitn + 1
+		curr_bait <- d_n$Bait[i]
+		d_n$AvgSpec[i] <- d_n$AvgSpec[i]/s[baitn]
+	}
+	else{
+		d_n$AvgSpec[i] <- d_n$AvgSpec[i]/s[baitn]
+	}
+}
+
+#print normalized data to file
+
+write.table(d_n, file = "norm_saint.txt", sep="\t", quote=F, row.names=F)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/R_dotPlot.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,83 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs(trailingOnly = TRUE)
+
+library('latticeExtra')
+library('colorRamps')
+
+data.file <- read.table("SC_data.txt", sep="\t", header=TRUE, row.names=1) ### import spectral count data
+data.file2 <- read.table("FDR_data.txt", sep="\t", header=TRUE, row.names=1) ### import FDR count data
+data.file3 <- read.table("clustered_matrix.txt", sep="\t", header=TRUE, row.names=1) ### import clustered matrix
+data.file4 <- scan("singletons.txt", what="", sep="\n", strip.white=T) ### import singleton data
+
+#setting parameters
+
+Sfirst=as.numeric(args[1]) #first FDR cutoff
+Ssecond=as.numeric(args[2]) #second FDR cutoff
+maxp=as.integer(args[3]) #maximum value for a spectral count
+
+#calculate column and row lengths
+
+#determine bait and prey ordering
+
+bait_levels=names(data.file3)
+prey_levels=c(rownames(data.file3),data.file4)
+
+x_ord=factor(row.names(data.file),levels=prey_levels)
+y_ord=factor(names(data.file),levels=bait_levels)
+
+df<-data.frame(y=rep(y_ord,nrow(data.file))
+	,x=rep(x_ord, each=ncol(data.file))
+	,z1=as.vector(t(data.file)) # Circle color
+	,z2=as.vector(t(data.file/apply(data.file,1,max))) # Circle size
+	,z3=as.vector(t(data.file2)) # FDR
+)
+	
+df$z1[df$z1>maxp] <- maxp #maximum value for spectral count
+df$z2[df$z2==0] <- NA
+df$z3[df$z3>Ssecond] <- 0.05*maxp
+df$z3[df$z3<=Ssecond & df$z3>Sfirst] <- 0.5*maxp
+df$z3[df$z3<=Sfirst] <- 1*maxp
+df$z4 <- df$z1
+df$z4[df$z4==0] <- 0
+df$z4[df$z4>0] <- 2.5 
+
+# The labeling for the colorkey
+
+labelat = c(0, maxp)
+labeltext = c(0, maxp)
+
+# color scheme to use
+
+nmb.colors<-maxp
+z.colors<-grey(rev(seq(0,0.9,0.9/nmb.colors))) #grayscale color scale
+
+#plot
+
+pl <- levelplot(z1~x*y, data=df
+	,col.regions =z.colors #terrain.colors(100)
+	,scales = list(x = list(rot = 90), y=list(cex=0.8), tck=0) # rotates X,Y labels and changes scale 
+	,colorkey = FALSE
+	,xlab="Prey", ylab="Bait"
+	,panel=function(x,y,z,...,col.regions){
+		print(x)
+		z.c<-df$z1[ (df$x %in% as.character(x)) & (df$y %in% y)]
+		z.2<-df$z2[ (df$x %in% as.character(x)) & (df$y %in% y)]
+		z.3<-df$z3
+		z.4<-df$z4
+		panel.xyplot(x,y
+			,as.table=TRUE
+			,pch=21 # point type to use (circles in this case)
+			,cex=((z.2-min(z.2,na.rm=TRUE))/(max(z.2,na.rm=TRUE)-min(z.2,na.rm=TRUE)))*3 #circle size
+			,fill=z.colors[floor((z.c-min(z.c,na.rm=TRUE))*nmb.colors/(max(z.c,na.rm=TRUE)-min(z.c,na.rm=TRUE)))+1] # circle colors
+			,col=z.colors[1+z.3] # border colors
+			,lex=z.4 #border thickness
+			)
+	}
+	#,main="Fold change" # graph main title
+	)
+if(ncol(data.file) > 4) ht=3.5+(0.36*((ncol(data.file)-1)-4)) else ht=3.5
+if(nrow(data.file) > 20) wd=8.25+(0.29*(nrow(data.file)-20)) else wd=5+(0.28*(nrow(data.file)-10))
+pdf("dotplot.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2)
+print(pl)
+dev.off()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/R_dotPlot_hc.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,125 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs(trailingOnly = TRUE)
+
+pheatmapj_loc <- paste(args[6],"pheatmap_j.R",sep="/")
+heatmap2j_loc <- paste(args[6],"heatmap_2j.R",sep="/")
+
+library('latticeExtra')
+library('RColorBrewer')
+library('grid')
+library(reshape2)
+library('gplots')
+library('gtools')
+source(pheatmapj_loc)
+source(heatmap2j_loc)
+
+data.file <- read.table("SC_data.txt", sep="\t", header=TRUE, row.names=1) ### import spectral count data
+data.file2 <- read.table("FDR_data.txt", sep="\t", header=TRUE, row.names=1) ### import FDR count data
+
+#setting parameters
+
+Sfirst=as.numeric(args[1]) #first FDR cutoff
+Ssecond=as.numeric(args[2]) #second FDR cutoff
+maxp=as.integer(args[3]) #maximum value for a spectral count
+methd <- args[4]
+dist_methd <- args[5]
+
+#determine bait and prey ordering
+
+dist_bait <- dist(as.matrix(t(data.file)), method= dist_methd) # "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
+dist_prey <- dist(as.matrix(data.file), method= dist_methd)
+
+if(methd == "ward"){
+	dist_bait <- dist_bait^2 #comment out this line and the next if not using Ward's method of clustering
+	dist_prey <- dist_prey^2
+}
+
+hc_bait <- hclust(dist_bait, method = methd) # method = "average", "single", "complete", "ward", "mcquitty", "median" or "centroid"
+hc_prey <- hclust(dist_prey, method = methd)
+
+data.file = data.file[hc_prey$order, , drop = FALSE]
+data.file = data.file[, hc_bait$order, drop = FALSE]
+data.file2 = data.file2[hc_prey$order, , drop = FALSE]
+data.file2 = data.file2[, hc_bait$order, drop = FALSE]
+
+x_ord=factor(row.names(data.file), levels=row.names(data.file))
+y_ord=factor(names(data.file[1,]), levels=names(data.file[1,]))
+
+df<-data.frame(y=rep(y_ord, nrow(data.file))
+	,x=rep(x_ord, each=ncol(data.file))
+	,z1=as.vector(t(data.file)) # Circle color
+	,z2=as.vector(t(data.file/apply(data.file,1,max))) # Circle size
+	,z3=as.vector(t(data.file2)) # FDR
+)
+	
+df$z1[df$z1>maxp] <- maxp #maximum value for spectral count
+df$z2[df$z2==0] <- NA
+df$z3[df$z3>Ssecond] <- 0.05*maxp
+df$z3[df$z3<=Ssecond & df$z3>Sfirst] <- 0.5*maxp
+df$z3[df$z3<=Sfirst] <- 1*maxp
+df$z4 <- df$z1
+df$z4[df$z4==0] <- 0
+df$z4[df$z4>0] <- 2.5 
+
+# The labeling for the colorkey
+
+labelat = c(0, maxp)
+labeltext = c(0, maxp)
+
+# color scheme to use
+
+nmb.colors<-maxp
+z.colors<-grey(rev(seq(0,0.9,0.9/nmb.colors))) #grayscale color scale
+
+#plot dotplot
+
+pl <- levelplot(z1~x*y, data=df
+	,col.regions =z.colors #terrain.colors(100)
+	,scales = list(x = list(rot = 90), y=list(cex=0.8), tck=0) # rotates X,Y labels and changes scale 
+	,colorkey = FALSE
+	#,colorkey = list(space="bottom", width=1.5, height=0.3, labels=list(at = labelat, labels = labeltext)) #put colorkey at top with my labeling scheme
+	,xlab="Prey", ylab="Bait"
+	,panel=function(x,y,z,...,col.regions){
+		print(x)
+		z.c<-df$z1[ (df$x %in% as.character(x)) & (df$y %in% y)]
+		z.2<-df$z2[ (df$x %in% as.character(x)) & (df$y %in% y)]
+		z.3<-df$z3
+		z.4<-df$z4
+		panel.xyplot(x,y
+			,as.table=TRUE
+			,pch=21 # point type to use (circles in this case)
+			,cex=((z.2-min(z.2,na.rm=TRUE))/(max(z.2,na.rm=TRUE)-min(z.2,na.rm=TRUE)))*3 #circle size
+			,fill=z.colors[floor((z.c-min(z.c,na.rm=TRUE))*nmb.colors/(max(z.c,na.rm=TRUE)-min(z.c,na.rm=TRUE)))+1] # circle colors
+			,col=z.colors[1+z.3] # border colors
+			,lex=z.4 #border thickness
+			)
+	}
+	#,main="Fold change" # graph main title
+	)
+if(ncol(data.file) > 4) ht=3.5+(0.36*((ncol(data.file)-1)-4)) else ht=3.5
+if(nrow(data.file) > 20) wd=8.25+(0.29*(nrow(data.file) -20)) else wd=5.7+(0.28*(nrow(data.file) -10))
+pdf("dotplot.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2)
+print(pl)
+dev.off()
+
+#plot bait vs prey heatmap
+
+heat_df <- acast(df, y~x, value.var="z1")
+heat_df <- apply(heat_df, 2, rev)
+
+if(ncol(data.file) > 4) ht=3.5+(0.1*((ncol(data.file)-1)-4)) else ht=3.5
+if(nrow(data.file) > 20) wd=8.25+(0.1*(nrow(data.file)-20)) else wd=5+(0.1*(nrow(data.file)-10))
+pdf("heatmap_borders.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2)
+pheatmap_j(heat_df, scale="none", border_color="black", border_width = 0.1, cluster_rows=FALSE, cluster_cols=FALSE, col=colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100))
+dev.off()
+
+pdf("heatmap_no_borders.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2)
+pheatmap_j(heat_df, scale="none", border_color=NA, cluster_rows=FALSE, cluster_cols=FALSE, col=colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100))
+dev.off()
+
+#plot bait vs bait heatmap using dist matrix
+dist_bait <- dist_bait/max(dist_bait)
+pdf("bait2bait.pdf", onefile = FALSE, paper = "special")
+heatmap_2j(as.matrix(dist_bait), trace="none", scale="none", density.info="none", col=rev(colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100)), xMin=0, xMax=1, margins=c(1.5*max(nchar(rownames(as.matrix(dist_bait)))),1.5*max(nchar(colnames(as.matrix(dist_bait))))))
+dev.off()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/R_dotPlot_nc.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,140 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs(trailingOnly = TRUE)
+
+pheatmapj_loc <- paste(args[9],"pheatmap_j.R",sep="/")
+
+library('latticeExtra')
+library('RColorBrewer')
+library('grid')
+library(reshape2)
+source(pheatmapj_loc)
+
+data.file <- read.table("SC_data.txt", sep="\t", header=TRUE, row.names=1) ### import spectral count data
+data.file2 <- read.table("FDR_data.txt", sep="\t", header=TRUE, row.names=1) ### import FDR count data
+bait_l <- scan(args[4], what="") ### import bait list
+if(args[5] == 0) prey_l <- scan(args[6], what="") ### import prey list
+methd <- args[7]
+dist_methd <- args[8]
+
+#setting parameters
+
+Sfirst=as.numeric(args[1]) #first FDR cutoff
+Ssecond=as.numeric(args[2]) #second FDR cutoff
+maxp=as.integer(args[3]) #maximum value for a spectral count
+
+#extract only needed data
+
+if(args[5] == 0){
+	remove <- vector()
+	remove <- prey_l[prey_l %in% row.names(data.file)]
+	prey_l <- prey_l[prey_l %in% remove]
+	remove <- bait_l[bait_l %in% names(data.file)]
+	bait_l <- bait_l[bait_l %in% remove]
+	data.file <- data.file[prey_l, bait_l]
+	data.file2 <- data.file2[prey_l, bait_l]
+} else{
+	remove <- vector()
+	remove <- bait_l[bait_l %in% names(data.file)]
+	bait_l <- bait_l[bait_l %in% remove]
+	data.file <- data.file[, bait_l]
+	data.file2 <- data.file2[, bait_l]
+	prey_keep = apply(data.file2, 1, function(x) sum(x<=Sfirst) >= 1)
+	data.file <- data.file[prey_keep,]
+	data.file2 <- data.file2[prey_keep,]
+}
+
+#determine bait and prey ordering
+
+y_ord=factor(names(data.file[1,]),levels=bait_l)
+
+if(args[5] == 0){
+	x_ord=factor(rownames(data.file),levels=prey_l)
+} else {
+	
+	data.file <- data.file[which(rowSums(data.file) > 0),]
+	dist_prey <- dist(as.matrix(data.file), method= dist_methd)
+
+	if(methd == "ward"){
+		dist_prey <- dist_prey^2
+	}
+
+	hc_prey <- hclust(dist_prey, method = methd)
+
+	data.file = data.file[hc_prey$order, , drop = FALSE]
+	data.file2 = data.file2[hc_prey$order, , drop = FALSE]
+
+	x_ord=factor(row.names(data.file), levels=row.names(data.file))
+}
+
+df<-data.frame(y=rep(y_ord, nrow(data.file))
+	,x=rep(x_ord, each=ncol(data.file))
+	,z1=as.vector(t(data.file)) # Circle color
+	,z2=as.vector(t(data.file/apply(data.file,1,max))) # Circle size
+	,z3=as.vector(t(data.file2)) # FDR
+)
+	
+df$z1[df$z1>maxp] <- maxp #maximum value for spectral count
+df$z2[df$z2==0] <- NA
+df$z3[df$z3>Ssecond] <- 0.05*maxp
+df$z3[df$z3<=Ssecond & df$z3>Sfirst] <- 0.5*maxp
+df$z3[df$z3<=Sfirst] <- 1*maxp
+df$z4 <- df$z1
+df$z4[df$z4==0] <- 0
+df$z4[df$z4>0] <- 2.5 
+
+# The labeling for the colorkey
+
+labelat = c(0, maxp)
+labeltext = c(0, maxp)
+
+# color scheme to use
+
+nmb.colors<-maxp
+z.colors<-grey(rev(seq(0,0.9,0.9/nmb.colors))) #grayscale color scale
+
+#plot dotplot
+
+pl <- levelplot(z1~x*y, data=df
+	,col.regions =z.colors #terrain.colors(100)
+	,scales = list(x = list(rot = 90), y=list(cex=0.8), tck=0) # rotates X,Y labels and changes scale 
+	,colorkey = FALSE
+	#,colorkey = list(space="bottom", width=1.5, height=0.3, labels=list(at = labelat, labels = labeltext)) #put colorkey at top with my labeling scheme
+	,xlab="Prey", ylab="Bait"
+	,panel=function(x,y,z,...,col.regions){
+		print(x)
+		z.c<-df$z1[ (df$x %in% as.character(x)) & (df$y %in% y)]
+		z.2<-df$z2[ (df$x %in% as.character(x)) & (df$y %in% y)]
+		z.3<-df$z3
+		z.4<-df$z4
+		panel.xyplot(x,y
+			,as.table=TRUE
+			,pch=21 # point type to use (circles in this case)
+			,cex=((z.2-min(z.2,na.rm=TRUE))/(max(z.2,na.rm=TRUE)-min(z.2,na.rm=TRUE)))*3 #circle size
+			,fill=z.colors[floor((z.c-min(z.c,na.rm=TRUE))*nmb.colors/(max(z.c,na.rm=TRUE)-min(z.c,na.rm=TRUE)))+1] # circle colors
+			,col=z.colors[1+z.3] # border colors
+			,lex=z.4 #border thickness
+			)
+	}
+	#,main="Fold change" # graph main title
+	)
+if(ncol(data.file) > 4) ht=3.5+(0.36*((ncol(data.file)-1)-4)) else ht=3.5
+if(nrow(data.file) > 20) wd=8.25+(0.29*(nrow(data.file)-20)) else wd=5.7+(0.28*(nrow(data.file)-10))
+pdf("dotplot.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2)
+print(pl)
+dev.off()
+
+#plot heatmap
+
+heat_df <- acast(df, y~x, value.var="z1")
+heat_df <- apply(heat_df, 2, rev)
+
+if(ncol(data.file) > 4) ht=3.5+(0.1*((ncol(data.file)-1)-4)) else ht=3.5
+if(nrow(data.file) > 20) wd=8.25+(0.1*(nrow(data.file)-20)) else wd=5+(0.1*(nrow(data.file)-10))
+pdf("heatmap_borders.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2)
+pheatmap_j(heat_df, scale="none", border_color="black", border_width = 0.1, cluster_rows=FALSE, cluster_cols=FALSE, col=colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100))
+dev.off()
+
+pdf("heatmap_no_borders.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2)
+pheatmap_j(heat_df, scale="none", border_color=NA, cluster_rows=FALSE, cluster_cols=FALSE, col=colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100))
+dev.off()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/SOFD.pl	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,112 @@
+#!/usr/bin/perl
+
+# 17/12/2013
+
+if($#ARGV==0){
+	print "This program takes the Saint Output File and produces two matrices.\n";
+	print "One has average spectral counts and the other FDR scores,\n";
+	print "\nusage:\n $0\n-i [csv saint output file]\n-s [FDR cutoff, default=0.01]\n\n";
+	die;
+}
+else{
+	$i=0;
+	$cutoff=0.01;
+        $spec_cutoff=0;
+	while($i<=$#ARGV){
+		if($ARGV[$i] eq '-i'){
+			$i++;
+			$ifile=$ARGV[$i];
+		}
+		elsif($ARGV[$i] eq '-s'){
+			$i++;
+			if($ARGV[$i]>1 || $ARGV[$i]<0){  
+				die "\nFDR cutoff must be between 0 and 1 \n\n";
+			}
+			$cutoff=$ARGV[$i];
+		}
+	        elsif($ARGV[$i] eq '-x'){
+			$i++;
+			if($ARGV[$i]<0){  
+				die "\nAvgSpec cutoff must be > 0 \n\n";
+			}
+			$spec_cutoff=$ARGV[$i];
+		}
+		else{
+			die "\Incorrect program usage\n\n";
+		}
+		$i++;
+	}
+}
+
+$baitn=0, $bait[0]=xxxx, $sig_preysn=0;
+$file='';
+open(IFILE,"<$ifile") || die "$ifile can't be opened: $!";
+{ local $/=undef;  $file=<IFILE>; }
+@lines=split /[\r\n]+/, $file;
+foreach $line (@lines) {
+   	if($line =~ /^Bait/){
+	}
+	elsif($line =~ /^([^\t]+)\t[^\t]+\t([^\t]+)\t[^\t]+\t[\d]+\t([\d\.]+)\t[\d]+\t[^\t]+\t[^\t]+\t[^\t]+\t[^\t]+\t[^\t]+\t([^\t]+)\t[^\t]+\t([^\t]+)\t/){
+		if($1 ne $bait[$baitn]){
+			$baitn++;
+			$bait[$baitn]=$1;
+			$preyn[$baitn]=0;
+		}
+		$preyn[$baitn]++;
+		$preys[$baitn][$preyn[$baitn]]=$2;	
+		$avgspec[$baitn][$preyn[$baitn]]=$3;	
+		$saint[$baitn][$preyn[$baitn]]=$4;
+		$fdr[$baitn][$preyn[$baitn]]=$5;
+		if($5 <= $cutoff && $3 >= $spec_cutoff){
+			$check_prey=0;
+			for($i=1; $i<=$sig_preysn; $i++){
+				if($sig_preys[$i] eq $2){
+					$check_prey=1;
+				}
+			}
+			if($check_prey==0){
+				$sig_preysn++;
+				$sig_preys[$sig_preysn]=$2;
+			}
+		}
+	}
+	else{
+	}
+}	
+close(IFILE);
+
+open(SC_FILE, ">SC_data.txt");
+open(FDR_FILE, ">FDR_data.txt");
+
+for($i=1; $i<=$baitn; $i++){
+	print SC_FILE "\t$bait[$i]";
+	print FDR_FILE "\t$bait[$i]";
+}
+print SC_FILE "\n";
+print FDR_FILE "\n";
+for($i=1; $i<=$sig_preysn; $i++){
+	print SC_FILE "$sig_preys[$i]";
+	print FDR_FILE "$sig_preys[$i]";
+	for($j=1; $j<=$baitn; $j++){
+		$krem=0;
+		for($k=1; $k<=$preyn[$j]; $k++){;
+			if($preys[$j][$k] eq $sig_preys[$i]){
+				$krem=$k;
+				last;
+			}
+		}
+		if($krem != 0){
+			print SC_FILE "\t$avgspec[$j][$krem]";
+			print FDR_FILE "\t$fdr[$j][$krem]";
+		}
+		else{
+			print SC_FILE "\t0";
+			print FDR_FILE "\t1";
+		}
+	}
+	print SC_FILE "\n";
+	print FDR_FILE "\n";
+}
+close(SC_FILE);
+close(FDR_FILE);
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/SaintConvert.pl	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,52 @@
+#!/usr/bin/perl
+
+# 17/12/2013
+
+if($#ARGV==0){
+	print "This program takes non-SaintExpress formatted data and converts it to look like it.\n";
+	print "\nusage:\n $0\n-i [csv saint output file]\n\n";
+	die;
+}
+else{
+	$i=0;
+	while($i<=$#ARGV){
+		if($ARGV[$i] eq '-i'){
+			$i++;
+			$ifile=$ARGV[$i];
+		}
+		else{
+			die "\Incorrect program usage\n\n";
+		}
+		$i++;
+	}
+}
+
+$i=0;
+$file='';
+open(IFILE,"<$ifile") || die "$ifile can't be opened: $!";
+{ local $/=undef;  $file=<IFILE>; }
+@lines=split /[\r\n]+/, $file;
+foreach $line (@lines) {
+   	if($line =~ /^Bait/){
+	}
+	elsif($line =~ /^([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)/){
+		$bait[$i]=$1;
+		$prey[$i]=$2;
+		$spec[$i]=$3;
+		$fdr[$i]=$4;
+		$i++;
+	}
+	else{
+	}
+}	
+close(IFILE);
+$line_count=$i;
+
+open(OFILE, ">mockSaintExpress.txt");
+print OFILE "Bait\tPrey\tPreyGene\tSpec\tSpecSum\tAvgSpec\tNumReplicates\tctrlCounts\tAvgP\tMaxP\tTopoAvgP\tTopoMaxP\tSaintScore\tFoldChange\tBFDR\tboosted_by\n";
+
+for($i=0; $i<$line_count; $i++){
+	print OFILE "$bait[$i]\t111\t$prey[$i]\t111\t111\t$spec[$i]\t111\t111\t111\t111\t111\t111\t111\t111\t$fdr[$i]\t111\n";
+}
+close(OFILE);
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/Step1_data_reformating.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,41 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs(trailingOnly = TRUE)
+
+d = read.delim(args[1], header=T, sep="\t", as.is=T)
+
+### Select Prey interactions were at least one Bait > Probability Threshold
+
+preylist=unique(c(d$PreyGene[d$BFDR <= as.numeric(args[2])]))
+pid = d$PreyGene %in% preylist
+d = d[pid,]
+
+bb = unique(d$Bait)
+pp = unique(d$PreyGene)
+
+nbait = length(bb)
+nprey = length(pp)
+
+### Reformat the SAINToutput data into a spreadsheet
+mat = matrix(0, nprey, nbait)
+
+n = nrow(d)
+mb = match(d$Bait, bb)
+mp = match(d$PreyGene, pp)
+
+### Using the AvgSpec for the spectral counts
+for(i in 1:n) {
+	mat[mp[i],mb[i]] = d$AvgSpec[i]	
+}
+
+rownames(mat) = pp
+colnames(mat) = bb
+
+outfile <- paste(c(args[3]), "matrix.txt", sep="_")
+### The following file is the outcome of running this step.
+write.table(mat, outfile, sep="\t", quote=F)
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/Step2_data_filtering.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,28 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs(trailingOnly = TRUE)
+
+d = read.delim(args[1], header=T, as.is=T)
+
+d2 = d
+d2s = d
+
+ss_cutoff <- as.numeric(args[2])
+### Here I'm only going to take the preys which appeared in at least 2 baits with >args[2] counts
+id = apply(d, 1, function(x) sum(x>ss_cutoff) >= 2)
+id2 = apply(d, 1, function(x) sum(x>ss_cutoff) < 2)
+d2 = d2[id, ]
+d2s = d2s[id2, 0]
+max.d2 = max(as.numeric(as.matrix(d2))) 
+d2 = d2 / max.d2 * 10
+
+d3 = data.frame(PROT = rownames(d2), d2)
+
+outfile <- paste(c(args[3]), "dat", sep=".")
+
+### The following file is the outcome of running this step.
+write.table(d3, outfile, sep="\t", quote=F, row.names=F)
+### This is the final input file for nested cluster algorithm
+
+write.table(d2s, "singletons.txt", quote=F)
+
Binary file Dotplot_Release/Step3_nestedcluster has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/Step4_biclustering.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,201 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs(trailingOnly = TRUE)
+
+d = read.delim(args[1], header=T, sep="\t", as.is=T, row.names=1)
+
+clusters = read.delim("Clusters", header=T, sep="\t", as.is=T)[,-1]
+clusters = data.frame(Bait=colnames(clusters), Cluster=as.numeric(clusters[1,]))
+nested.clusters = read.delim("NestedClusters", header=F, sep="\t", as.is=T)[1:dim(d)[1],]
+nested.phi = read.delim("NestedMu", header=F, sep="\t", as.is=T)[1:dim(d)[1],]
+nested.phi2 = read.delim("NestedSigma2", header=F, sep="\t", as.is=T)[1:dim(d)[1],]
+mcmc = read.delim("MCMCparameters", header=F, sep="\t", as.is=T)
+
+### distance between bait using phi (also reorder cluster names) 
+### report nested clusters with positive counts only
+### rearrange rows and columns of the raw data matrix according to the back-tracking algorithm
+
+recursivePaste = function(x) {
+  n = length(x)
+  x = x[order(x)]
+  y = x[1]
+  if(n > 1) {
+    for(i in 2:n) y = paste(y, x[i], sep="-")
+  }
+  y
+}
+
+calcDist = function(x, y) {
+  if(length(x) != length(y)) stop("different length\n")
+  else res = sum(abs(x-y))
+  res
+}
+
+
+#clusters, nested.clusters, nested.phi, d
+
+bcl = clusters
+pcl = nested.clusters
+phi = nested.phi
+phi2 = nested.phi2
+dat = d
+
+
+## bipartite graph
+make.graphlet = function(b,p,s) {
+  g = NULL
+  g$b = b
+  g$p = p
+  g$s = as.numeric(s)
+  g
+}
+
+make.hub = function(b,p) {
+  g = NULL
+  g$b = b
+  g$p = p
+  g
+}
+
+jaccard = function(x,y) {
+  j = length(intersect(x,y)) / length(union(x,y))
+  j
+}
+
+merge.graphlets = function(x, y) {
+  g = NULL
+  g$b = union(x$b, y$b)
+  g$p = union(x$p, y$p)
+  g$s1 = rep(0,length(g$p))
+  g$s2 = rep(0,length(g$p))
+  g$s1[match(x$p, g$p)] = x$s
+  g$s2[match(y$p, g$p)] = y$s
+  g$s = apply(cbind(g$s1, g$s2), 1, max)
+  g
+}
+
+summarizeDP = function(bcl, pcl, phi, phi2, dat, hub.size=0.5, ...) {
+  pcl = as.matrix(pcl)
+  phi = as.matrix(phi)
+  phi2 = as.matrix(phi2)
+  dat = as.matrix(dat)
+  rownames(phi) = rownames(dat)
+  rownames(phi2) = rownames(dat)
+
+  ubcl = unique(as.numeric(bcl$Cluster))
+  n = length(ubcl)
+  pcl = pcl[,ubcl]
+  phi = phi[,ubcl]
+  phi2 = phi2[,ubcl]
+  phi[phi < 0.05] = 0
+
+  bcl$Cluster = match(as.numeric(bcl$Cluster), ubcl)
+  colnames(pcl) = colnames(phi) = colnames(phi2) = paste("CL", 1:n, sep="")
+
+  ## remove non-reproducible mean values
+  nprey = dim(dat)[1]; nbait = dim(dat)[2]
+  preys = rownames(dat); baits = colnames(dat)
+  n = length(unique(bcl$Cluster))
+  for(j in 1:n) {
+    id = c(1:nbait)[bcl$Cluster == j]
+    for(k in 1:nprey) {
+      do.it = ifelse(mean(as.numeric(dat[k,id]) > 0) <= 0.5,TRUE,FALSE)
+      if(do.it) {
+        phi[k,j] = 0
+      }
+    }
+  }
+
+  ## create bipartite graphs (graphlets)
+  gr = NULL
+  for(j in 1:n) {
+    id = c(1:nbait)[bcl$Cluster == j]
+    id2 = c(1:nprey)[phi[,j] > 0]
+    gr[[j]] = make.graphlet(baits[id], preys[id2], phi[id2,j])
+  }
+
+  ## intersecting preys between graphlets
+  gr2 = NULL
+  cur = 1
+  for(i in 1:n) {
+    for(j in 1:n) {
+      if(i != j) {
+        combine = jaccard(gr[[i]]$p, gr[[j]]$p) >= 0.75
+        if(combine) {
+          gr2[[cur]] = merge.graphlets(gr[[i]], gr[[j]])
+          cur = cur + 1
+        }
+      }
+    }
+  }
+
+  old.phi = phi
+  phi = phi[, bcl$Cluster]
+  phi2 = phi2[, bcl$Cluster]
+  ## find hub preys
+  proceed = apply(old.phi, 1, function(x) sum(x>0) >= 2)
+  h = NULL
+  cur = 1
+  for(k in 1:nprey) {
+    if(proceed[k]) {
+      id = as.numeric(phi[k,]) > 0
+      if(mean(id) >= hub.size) {
+        h[[cur]] = make.hub(baits[id], preys[k])
+        cur = cur + 1
+      }
+    }
+  }
+  nhub = cur - 1
+
+  res = list(data=dat, baitCL=bcl, phi=phi, phi2=phi2, gr = gr, gr2 = gr2, hub = h)
+  res
+}
+
+res = summarizeDP(clusters, nested.clusters, nested.phi, nested.phi2, d)
+
+write.table(res$baitCL[order(res$baitCL$Cluster),], "baitClusters", sep="\t", quote=F, row.names=F)
+write.table(res$data, "clusteredData", sep="\t", quote=F)
+
+##### SOFT
+library(gplots)
+tmpd = res$data
+tmpm = res$phi
+colnames(tmpm) = paste(colnames(res$data), colnames(tmpm))
+
+pdf("estimated.pdf", height=25, width=8)
+my.hclust<-hclust(dist(tmpd))
+my.dend<-as.dendrogram(my.hclust)
+tmp.res = heatmap.2(tmpm, Rowv=my.dend, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4)
+#tmp.res = heatmap.2(tmpm, Rowv=T, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4)
+tmpd = tmpd[rev(tmp.res$rowInd),tmp.res$colInd]
+write.table(tmpd, "clustered_matrix.txt", sep="\t", quote=F)
+heatmap.2(tmpd, Rowv=F, Colv=F, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4)
+dev.off()
+
+
+### Statistical Plots 
+dd = dist(1-cor((res$phi), method="pearson"))
+dend = as.dendrogram(hclust(dd, "ave"))
+#plot(dend) 
+
+pdf("bait2bait.pdf")
+tmp = res$phi
+colnames(tmp) = paste(colnames(res$phi), res$baitCL$Bait, sep="_")
+
+###dd = cor(tmp[,-26])    ### This line is only for Chris' data (one bait has all zeros in the estimated parameters)
+dd = cor(tmp)    ### This line is only for Chris' data (one bait has all zeros in the estimated parameters)
+
+write.table(dd, "bait2bait_matrix.txt", sep="\t", quote=F)
+heatmap.2(as.matrix(dd), trace="n", breaks=seq(-1,1,by=0.1), col=(greenred(20)), cexRow=0.7, cexCol=0.7)
+dev.off()
+
+tmp = mcmc[,2]
+ymax = max(tmp)
+ymin = min(tmp)
+pdf("stats.pdf", height=12, width=12)
+
+plot(mcmc[mcmc[,4]=="G",3], type="s", xlab="Iterations", ylab="Number of Clusters", main="")
+plot(mcmc[,2], type="l", xlab="Iterations", ylab="Log-Likelihood", main="", ylim=c(ymin,ymax))
+
+dev.off()
+
Binary file Dotplot_Release/biclust.tar.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/biclust_param.txt	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,12 @@
+np 10
+nb 100
+a 1.0
+b 1.0
+lambda 0.0
+nu 25.0
+alpha 1.0
+rho 1.0
+gamma 1.0
+nburn 5000
+niter 10000
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/dotplot.bash	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,252 @@
+#!/bin/bash
+#SCRIPT=$(readlink -e $0)
+#SCRIPTPATH=`dirname $SCRIPT`
+pushd `dirname $0` > /dev/null
+SCRIPTPATH=`pwd`
+popd > /dev/null
+
+usage() { printf "Usage: $0 
+[-f <saint_file_name.txt>]
+[-i <0 for SaintExpress format, 1 for other>]
+[-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>]
+[-n <clustering type to be performed if option -c is set to \"h\">]
+[-d <distance metric to use if option -c is set to \"h\">]
+[-b <list of bait proteins in display order (see option -c n)>]
+[-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>]
+[-s <primary FDR cutoff [0-1, recommended=0.01]>]
+[-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]>
+[-x <spectral count minimum. Only preys with >= this will be used]>
+[-m <maximum spectral count>]
+[-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>]
+[-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n"
+1>&2; exit 1; }
+
+N=0
+n="ward"
+d="canberra"
+x=0
+i=0
+while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do
+    case "${o}" in
+        f)
+            f=${OPTARG}
+            ;;
+        i)
+	    i=${OPTARG}
+            ;;
+        s)
+            s=${OPTARG}
+            ;;
+	t)
+            t=${OPTARG}
+            ;;
+        x)
+	    x=${OPTARG}
+            ;;
+	m)
+            m=${OPTARG}
+            ;;
+	c)
+            c=${OPTARG}
+	    ;;
+	n)
+	    n=${OPTARG}
+	    ;;
+	d)
+	    d=${OPTARG}
+	    ;;
+	b)
+            b=${OPTARG}
+	    ;;
+	p)
+	    p=${OPTARG}
+	    ;;
+	N)
+	    N=${OPTARG}
+	    ;;
+	C)
+	    C=${OPTARG}
+	    ;;
+        *)
+            usage
+            ;;
+    esac
+done
+shift $((OPTIND-1))
+
+filename=${f%%.*}
+echo "Saint input file = ${f}"
+echo "Primary FDR cutoff = ${s}"
+echo "Secondary FDR cutoff for dotplot = ${t}"
+echo "Minimum spectral count for significant preys = ${x}"
+echo "Maximum spectral count for dot plot = ${m}"
+
+if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then
+    usage
+fi
+
+if [ "${i}" == 1 ]; then
+	$SCRIPTPATH/SaintConvert.pl -i ${f}
+	f="mockSaintExpress.txt"
+fi
+
+if [ "${x}" -ge "${m}" ]; then
+	echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})"
+	exit 1;
+elif [ "${x}" -lt 0 ]; then
+	echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing"
+	x=0
+fi
+
+###Check for normalization
+
+if [ "${N}" == 1 ]; then
+	printf "\nNormalization is being performed\n"
+	$SCRIPTPATH/Normalization.R ${f}
+	f="norm_saint.txt"
+elif [ "${N}" == 2 ]; then
+	printf "\nNormalization is being performed\n"
+	if [ -z "${C}" ]; then
+		C=${t}
+	fi
+	$SCRIPTPATH/Normalization_sigpreys.R ${f} ${C}
+	f="norm_saint.txt"
+fi
+
+
+###Check for clustering etc
+
+if [ "${c}" == "h" ] && [ -z "${n}" ]; then
+	printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n"
+	printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n"
+	printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n"
+	n="ward"
+elif [ "${c}" == "h" ] && [ -n "${n}" ]; then
+	if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [  "${n}" == "single" ] || [ "${n}" == "ward" ]; then
+		printf "\nHierarchical clustering (method = ${n}) will be performed\n\n"
+	else
+		printf "\n${n} is not a valid Hierarchical clustering method.\n"
+		printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n"
+		exit 1
+	fi
+fi
+
+p_c=0
+if [ "${c}" == "h" ] && [ -z "${d}" ]; then
+	printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n"
+	printf "The input parameter -d must be set to one of  \"binary\", \"canberra\", \"euclidean\",\n"
+	printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n"
+	d="canberra"
+elif [ "${c}" == "h" ] && [ -n "${d}" ]; then
+	if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [  "${d}" == "minkowski" ]; then
+		printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n"
+	else
+		printf "\n${d} is not a valid Hierarchical clustering distance metric.\n"
+		printf "Choose one of  \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n"
+		exit 1
+	fi
+fi
+
+if [ "${c}" == "n" ] && [ -z "${b}" ]; then
+	printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n"
+	printf "Bait list must be in .txt formart.\n\n"
+	exit 1
+elif [ "${c}" == "n" ] && [ -z "${p}" ]; then
+	printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n"
+	printf "Prey list must be in .txt formart.\n\n"
+	exit 1
+elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then
+	printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n"
+	printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n"
+	p="empty"
+	p_c=1
+	n="ward"
+	d="canberra"
+fi
+
+
+###Check number of baits
+
+bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f})
+echo "Number of baits = "$bait_n
+printf "\n\n"
+
+if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then
+	printf "\nWarning only 2 baits are present. Biclustering will not performed.\n"
+	printf "Hierarchical clustering (method = ward) will be performed instead.\n\n"
+	c="h"
+	n="ward"
+fi
+
+
+###Generate plots
+
+if [ "${c}" == "b" ]; then
+	printf "\nBiclustering will be performed\n\n"
+	$SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename}
+	$SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename}
+	GSL_RNG_SEED=123  $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt
+	$SCRIPTPATH/Step4_biclustering.R ${filename}.dat
+
+	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
+	$SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m}
+	mkdir Output_${filename}
+	mkdir Output_${filename}/TempData_${filename}
+	mv bait_lists Output_${filename}/TempData_${filename}
+	mv Clusters Output_${filename}/TempData_${filename}
+	mv MCMCparameters Output_${filename}/TempData_${filename}
+	mv NestedClusters Output_${filename}/TempData_${filename}
+	mv NestedMu Output_${filename}/TempData_${filename}
+	mv NestedSigma2 Output_${filename}/TempData_${filename}
+	mv OPTclusters Output_${filename}/TempData_${filename}
+	mv ${filename}_matrix.txt Output_${filename}/TempData_${filename}
+	mv ${filename}.dat Output_${filename}/TempData_${filename}
+	mv SC_data.txt Output_${filename}/TempData_${filename}
+	mv FDR_data.txt Output_${filename}/TempData_${filename}
+	mv clustered_matrix.txt Output_${filename}/TempData_${filename}
+	mv singletons.txt Output_${filename}/TempData_${filename}
+	mv bait2bait_matrix.txt Output_${filename}/TempData_${filename}
+	mv baitClusters Output_${filename}/TempData_${filename}
+	mv clusteredData Output_${filename}/TempData_${filename}
+	mv dotplot.pdf Output_${filename}
+	mv bait2bait.pdf Output_${filename} 
+	mv estimated.pdf Output_${filename} 
+	mv stats.pdf Output_${filename}
+	cp $SCRIPTPATH/legend.pdf Output_${filename}
+elif [ "${c}" == "h" ]; then
+
+	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
+	$SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH
+
+	mkdir Output_${filename}
+	mkdir Output_${filename}/TempData_${filename}
+	mv dotplot.pdf Output_${filename}
+	mv heatmap_borders.pdf Output_${filename}
+	mv heatmap_no_borders.pdf Output_${filename}
+	mv bait2bait.pdf Output_${filename}
+	mv SC_data.txt Output_${filename}/TempData_${filename}
+	mv FDR_data.txt Output_${filename}/TempData_${filename}
+	cp $SCRIPTPATH/legend.pdf Output_${filename}
+elif [ "${c}" == "n" ]; then
+	
+	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
+	echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH"
+	$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH
+
+	mkdir Output_${filename}
+	mkdir Output_${filename}/TempData_${filename}
+	mv dotplot.pdf Output_${filename}
+	mv heatmap_borders.pdf Output_${filename}
+	mv heatmap_no_borders.pdf Output_${filename}
+	mv SC_data.txt Output_${filename}/TempData_${filename}
+	mv FDR_data.txt Output_${filename}/TempData_${filename}
+	cp $SCRIPTPATH/legend.pdf Output_${filename}
+else
+	printf -- "-c must be one of [b, h, n]:  b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n"
+	exit 1;
+fi
+
+if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then
+	mv norm_saint.txt Output_${filename}/TempData_${filename}
+fi
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dotplot_Release/pheatmap_j.R	Fri Jan 29 09:56:02 2016 -0500
@@ -0,0 +1,719 @@
+lo = function(rown, coln, nrow, ncol, cellheight = NA, cellwidth = NA, treeheight_col, treeheight_row, legend, annotation, annotation_colors, annotation_legend, main, fontsize, fontsize_row, fontsize_col, ...){
+	# Get height of colnames and length of rownames
+	if(!is.null(coln[1])){
+		longest_coln = which.max(strwidth(coln, units = 'in'))
+		gp = list(fontsize = fontsize_col, ...)
+		coln_height = unit(1, "grobheight", textGrob(coln[longest_coln], rot = 90, gp = do.call(gpar, gp))) + unit(5, "bigpts")
+	}
+	else{
+		coln_height = unit(5, "bigpts")
+	}
+	
+	if(!is.null(rown[1])){
+		longest_rown = which.max(strwidth(rown, units = 'in'))
+		gp = list(fontsize = fontsize_row, ...)
+		rown_width = unit(1, "grobwidth", textGrob(rown[longest_rown], gp = do.call(gpar, gp))) + unit(10, "bigpts")
+	}
+	else{
+		rown_width = unit(5, "bigpts")
+	}
+	
+	gp = list(fontsize = fontsize, ...)
+	# Legend position
+	if(!is.na(legend[1])){
+		longest_break = which.max(nchar(names(legend)))
+		longest_break = unit(1.1, "grobwidth", textGrob(as.character(names(legend))[longest_break], gp = do.call(gpar, gp)))
+		title_length = unit(1.1, "grobwidth", textGrob("Scale", gp = gpar(fontface = "bold", ...)))
+		legend_width = unit(12, "bigpts") + longest_break * 1.2
+		legend_width = max(title_length, legend_width)
+	}
+	else{
+		legend_width = unit(0, "bigpts")
+	}
+	
+	# Set main title height
+	if(is.na(main)){
+		main_height = unit(0, "npc")
+	}
+	else{
+		main_height = unit(1.5, "grobheight", textGrob(main, gp = gpar(fontsize = 1.3 * fontsize, ...)))
+	}
+	
+	# Column annotations
+	if(!is.na(annotation[[1]][1])){
+		# Column annotation height 
+		annot_height = unit(ncol(annotation) * (8 + 2) + 2, "bigpts")
+		# Width of the correponding legend
+		longest_ann = which.max(nchar(as.matrix(annotation)))
+		annot_legend_width = unit(1.2, "grobwidth", textGrob(as.matrix(annotation)[longest_ann], gp = gpar(...))) + unit(12, "bigpts")
+		if(!annotation_legend){
+			annot_legend_width = unit(0, "npc")
+		}
+	}
+	else{
+		annot_height = unit(0, "bigpts")
+		annot_legend_width = unit(0, "bigpts")
+	}
+	
+	# Tree height
+	treeheight_col = unit(treeheight_col, "bigpts") + unit(5, "bigpts")
+	treeheight_row = unit(treeheight_row, "bigpts") + unit(5, "bigpts") 
+	
+	# Set cell sizes
+	if(is.na(cellwidth)){
+		matwidth = unit(1, "npc") - rown_width - legend_width - treeheight_row - annot_legend_width
+	}
+	else{
+		matwidth = unit(cellwidth * ncol, "bigpts")
+	}
+	
+	if(is.na(cellheight)){
+		matheight = unit(1, "npc") - main_height - coln_height - treeheight_col - annot_height
+	}
+	else{
+		matheight = unit(cellheight * nrow, "bigpts")
+	}	
+	
+	
+	# Produce layout()
+	pushViewport(viewport(layout = grid.layout(nrow = 5, ncol = 5, widths = unit.c(treeheight_row, matwidth, rown_width, legend_width, annot_legend_width), heights = unit.c(main_height, treeheight_col, annot_height, matheight, coln_height)), gp = do.call(gpar, gp)))
+	
+	# Get cell dimensions
+	pushViewport(vplayout(4, 2))
+	cellwidth = convertWidth(unit(0:1, "npc"), "bigpts", valueOnly = T)[2] / ncol
+	cellheight = convertHeight(unit(0:1, "npc"), "bigpts", valueOnly = T)[2] / nrow
+	upViewport()
+	
+	# Return minimal cell dimension in bigpts to decide if borders are drawn
+	mindim = min(cellwidth, cellheight) 
+	return(mindim)
+}
+
+draw_dendrogram = function(hc, horizontal = T){
+	h = hc$height / max(hc$height) / 1.05
+	m = hc$merge
+	o = hc$order
+	n = length(o)
+
+	m[m > 0] = n + m[m > 0] 
+	m[m < 0] = abs(m[m < 0])
+
+	dist = matrix(0, nrow = 2 * n - 1, ncol = 2, dimnames = list(NULL, c("x", "y"))) 
+	dist[1:n, 1] = 1 / n / 2 + (1 / n) * (match(1:n, o) - 1)
+
+	for(i in 1:nrow(m)){
+		dist[n + i, 1] = (dist[m[i, 1], 1] + dist[m[i, 2], 1]) / 2
+		dist[n + i, 2] = h[i]
+	}
+	
+	draw_connection = function(x1, x2, y1, y2, y){
+		grid.lines(x = c(x1, x1), y = c(y1, y))
+		grid.lines(x = c(x2, x2), y = c(y2, y))
+		grid.lines(x = c(x1, x2), y = c(y, y))
+	}
+	
+	if(horizontal){
+		for(i in 1:nrow(m)){
+			draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1], dist[m[i, 1], 2], dist[m[i, 2], 2], h[i])
+		}
+	}
+	
+	else{
+		gr = rectGrob()
+		pushViewport(viewport(height = unit(1, "grobwidth", gr), width = unit(1, "grobheight", gr), angle = 90))
+		dist[, 1] = 1 - dist[, 1] 
+		for(i in 1:nrow(m)){
+			draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1], dist[m[i, 1], 2], dist[m[i, 2], 2], h[i])
+		}
+		upViewport()
+	}
+}
+
+draw_matrix = function(matrix, border_color, border_width, fmat, fontsize_number){
+	n = nrow(matrix)
+	m = ncol(matrix)
+	x = (1:m)/m - 1/2/m
+	y = 1 - ((1:n)/n - 1/2/n)
+	for(i in 1:m){
+		grid.rect(x = x[i], y = y[1:n], width = 1/m, height = 1/n, gp = gpar(fill = matrix[,i], col = border_color, lwd = border_width))
+		if(attr(fmat, "draw")){
+			grid.text(x = x[i], y = y[1:n], label = fmat[, i], gp = gpar(col = "grey30", fontsize = fontsize_number))
+		}
+	}
+}
+
+draw_colnames = function(coln, ...){
+	m = length(coln)
+	x = (1:m)/m - 1/2/m
+	grid.text(coln, x = x, y = unit(0.96, "npc"), just="right", rot = 90, gp = gpar(...))
+}
+
+draw_rownames = function(rown, ...){
+	n = length(rown)
+	y = 1 - ((1:n)/n - 1/2/n)
+	grid.text(rown, x = unit(0.04, "npc"), y = y, vjust = 0.5, hjust = 0, gp = gpar(...))	
+}
+
+draw_legend = function(color, breaks, legend, ...){
+	height = min(unit(1, "npc"), unit(150, "bigpts"))
+	pushViewport(viewport(x = 0, y = unit(1, "npc"), just = c(0, 1), height = height))
+	legend_pos = (legend - min(breaks)) / (max(breaks) - min(breaks))
+	breaks = (breaks - min(breaks)) / (max(breaks) - min(breaks))
+	h = breaks[-1] - breaks[-length(breaks)]
+	grid.rect(x = 0, y = breaks[-length(breaks)], width = unit(10, "bigpts"), height = h, hjust = 0, vjust = 0, gp = gpar(fill = color, col = "#FFFFFF00"))
+	grid.text(names(legend), x = unit(12, "bigpts"), y = legend_pos, hjust = 0, gp = gpar(...))
+	upViewport()
+}
+
+convert_annotations = function(annotation, annotation_colors){
+	new = annotation
+	for(i in 1:ncol(annotation)){
+		a = annotation[, i]
+		b = annotation_colors[[colnames(annotation)[i]]]
+		if(is.character(a) | is.factor(a)){
+			a = as.character(a)
+			if(length(setdiff(a, names(b))) > 0){
+				stop(sprintf("Factor levels on variable %s do not match with annotation_colors", colnames(annotation)[i]))
+			}
+			new[, i] = b[a]
+		}
+		else{
+			a = cut(a, breaks = 100)
+			new[, i] = colorRampPalette(b)(100)[a]
+		}
+	}
+	return(as.matrix(new))
+}
+
+draw_annotations = function(converted_annotations, border_color, border_width){
+	n = ncol(converted_annotations)
+	m = nrow(converted_annotations)
+	x = (1:m)/m - 1/2/m
+	y = cumsum(rep(8, n)) - 4 + cumsum(rep(2, n))
+	for(i in 1:m){
+		grid.rect(x = x[i], unit(y[1:n], "bigpts"), width = 1/m, height = unit(8, "bigpts"), gp = gpar(fill = converted_annotations[i, ], col = border_color, lwd = border_width))
+	}
+}
+
+draw_annotation_legend = function(annotation, annotation_colors, border_color, border_width, ...){
+	y = unit(1, "npc")
+	text_height = unit(1, "grobheight", textGrob("FGH", gp = gpar(...)))
+	for(i in names(annotation_colors)){
+		grid.text(i, x = 0, y = y, vjust = 1, hjust = 0, gp = gpar(fontface = "bold", ...))
+		y = y - 1.5 * text_height
+		if(is.character(annotation[, i]) | is.factor(annotation[, i])){
+			for(j in 1:length(annotation_colors[[i]])){
+				grid.rect(x = unit(0, "npc"), y = y, hjust = 0, vjust = 1, height = text_height, width = text_height, gp = gpar(col = border_color, lwd = border_width, fill = annotation_colors[[i]][j]))
+				grid.text(names(annotation_colors[[i]])[j], x = text_height * 1.3, y = y, hjust = 0, vjust = 1, gp = gpar(...))
+				y = y - 1.5 * text_height
+			}
+		}
+		else{
+			yy = y - 4 * text_height + seq(0, 1, 0.02) * 4 * text_height
+			h = 4 * text_height * 0.02
+			grid.rect(x = unit(0, "npc"), y = yy, hjust = 0, vjust = 1, height = h, width = text_height, gp = gpar(col = "#FFFFFF00", fill = colorRampPalette(annotation_colors[[i]])(50)))
+			txt = rev(range(grid.pretty(range(annotation[, i], na.rm = TRUE))))
+			yy = y - c(0, 3) * text_height
+			grid.text(txt, x = text_height * 1.3, y = yy, hjust = 0, vjust = 1, gp = gpar(...))
+			y = y - 4.5 * text_height
+		}
+		y = y - 1.5 * text_height
+	}
+}
+
+draw_main = function(text, ...){
+	grid.text(text, gp = gpar(fontface = "bold", ...))
+}
+
+vplayout = function(x, y){
+	return(viewport(layout.pos.row = x, layout.pos.col = y))
+}
+
+heatmap_motor = function(matrix, border_color, border_width, cellwidth, cellheight, tree_col, tree_row, treeheight_col, treeheight_row, filename, width, height, breaks, color, legend, annotation, annotation_colors, annotation_legend, main, fontsize, fontsize_row, fontsize_col, fmat, fontsize_number, ...){
+	grid.newpage()
+	
+	# Set layout
+	mindim = lo(coln = colnames(matrix), rown = rownames(matrix), nrow = nrow(matrix), ncol = ncol(matrix), cellwidth = cellwidth, cellheight = cellheight, treeheight_col = treeheight_col, treeheight_row = treeheight_row, legend = legend, annotation = annotation, annotation_colors = annotation_colors, annotation_legend = annotation_legend, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col,  ...)
+	
+	if(!is.na(filename)){
+		pushViewport(vplayout(1:5, 1:5))
+		
+		if(is.na(height)){
+			height = convertHeight(unit(0:1, "npc"), "inches", valueOnly = T)[2]
+		}
+		if(is.na(width)){
+			width = convertWidth(unit(0:1, "npc"), "inches", valueOnly = T)[2]
+		}
+		
+		# Get file type
+		r = regexpr("\\.[a-zA-Z]*$", filename)
+		if(r == -1) stop("Improper filename")
+		ending = substr(filename, r + 1, r + attr(r, "match.length"))
+
+		f = switch(ending,
+			pdf = function(x, ...) pdf(x, ...),
+			png = function(x, ...) png(x, units = "in", res = 300, ...),
+			jpeg = function(x, ...) jpeg(x, units = "in", res = 300, ...),
+			jpg = function(x, ...) jpeg(x, units = "in", res = 300, ...),
+			tiff = function(x, ...) tiff(x, units = "in", res = 300, compression = "lzw", ...),
+			bmp = function(x, ...) bmp(x, units = "in", res = 300, ...),
+			stop("File type should be: pdf, png, bmp, jpg, tiff")
+		)
+		
+		# print(sprintf("height:%f width:%f", height, width))
+		f(filename, height = height, width = width)
+		heatmap_motor(matrix, cellwidth = cellwidth, cellheight = cellheight, border_color = border_color, border_width = border_width, tree_col = tree_col, tree_row = tree_row, treeheight_col = treeheight_col, treeheight_row = treeheight_row, breaks = breaks, color = color, legend = legend, annotation = annotation, annotation_colors = annotation_colors, annotation_legend = annotation_legend, filename = NA, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number =  fontsize_number, ...)
+		dev.off()
+		upViewport()
+		return()
+	}
+	
+	# Omit border color if cell size is too small 
+	if(mindim < 3) border_color = NA
+	
+	# Draw title
+	if(!is.na(main)){
+		pushViewport(vplayout(1, 2))
+		draw_main(main, fontsize = 1.3 * fontsize, ...)
+		upViewport()
+	}
+	
+	# Draw tree for the columns
+	if(!is.na(tree_col[[1]][1]) & treeheight_col != 0){
+		pushViewport(vplayout(2, 2))
+		draw_dendrogram(tree_col, horizontal = T)
+		upViewport()
+	}
+	
+	# Draw tree for the rows
+	if(!is.na(tree_row[[1]][1]) & treeheight_row != 0){
+		pushViewport(vplayout(4, 1))
+		draw_dendrogram(tree_row, horizontal = F)
+		upViewport()
+	}
+	
+	# Draw matrix
+	pushViewport(vplayout(4, 2))
+	draw_matrix(matrix, border_color, border_width, fmat, fontsize_number)
+	upViewport()
+	
+	# Draw colnames
+	if(length(colnames(matrix)) != 0){
+		pushViewport(vplayout(5, 2))
+		pars = list(colnames(matrix), fontsize = fontsize_col, ...)
+		do.call(draw_colnames, pars)
+		upViewport()
+	}
+	
+	# Draw rownames
+	if(length(rownames(matrix)) != 0){
+		pushViewport(vplayout(4, 3))
+		pars = list(rownames(matrix), fontsize = fontsize_row, ...)
+		do.call(draw_rownames, pars)
+		upViewport()
+	}
+	
+	# Draw annotation tracks
+	if(!is.na(annotation[[1]][1])){
+		pushViewport(vplayout(3, 2))
+		converted_annotation = convert_annotations(annotation, annotation_colors)
+		draw_annotations(converted_annotation, border_color, border_width)
+		upViewport()
+	}
+	
+	# Draw annotation legend
+	if(!is.na(annotation[[1]][1]) & annotation_legend){
+		if(length(rownames(matrix)) != 0){
+			pushViewport(vplayout(4:5, 5))
+		}
+		else{
+			pushViewport(vplayout(3:5, 5))
+		}
+		draw_annotation_legend(annotation, annotation_colors, border_color, border_width, fontsize = fontsize, ...)
+		upViewport()
+	}
+	
+	# Draw legend
+	if(!is.na(legend[1])){
+		length(colnames(matrix))
+		if(length(rownames(matrix)) != 0){
+			pushViewport(vplayout(4:5, 4))
+		}
+		else{
+			pushViewport(vplayout(3:5, 4))
+		}
+		draw_legend(color, breaks, legend, fontsize = fontsize, ...)
+		upViewport()
+	}
+	
+	
+}
+
+generate_breaks = function(x, n, center = F){
+	if(center){
+		m = max(abs(c(min(x, na.rm = T), max(x, na.rm = T))))
+		res = seq(-m, m, length.out = n + 1)
+	}
+	else{
+		res = seq(min(x, na.rm = T), max(x, na.rm = T), length.out = n + 1)
+	}
+	
+	return(res)
+}
+
+scale_vec_colours = function(x, col = rainbow(10), breaks = NA){
+	return(col[as.numeric(cut(x, breaks = breaks, include.lowest = T))])
+}
+
+scale_colours = function(mat, col = rainbow(10), breaks = NA){
+	mat = as.matrix(mat)
+	return(matrix(scale_vec_colours(as.vector(mat), col = col, breaks = breaks), nrow(mat), ncol(mat), dimnames = list(rownames(mat), colnames(mat))))
+}
+
+cluster_mat = function(mat, distance, method){
+	if(!(method %in% c("ward", "single", "complete", "average", "mcquitty", "median", "centroid"))){
+		stop("clustering method has to one form the list: 'ward', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid'.")
+	}
+	if(!(distance[1] %in% c("correlation", "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski")) & class(distance) != "dist"){
+		print(!(distance[1] %in% c("correlation", "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski")) | class(distance) != "dist")
+		stop("distance has to be a dissimilarity structure as produced by dist or one measure  form the list: 'correlation', 'euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'minkowski'")
+	}
+	if(distance[1] == "correlation"){
+		d = as.dist(1 - cor(t(mat)))
+	}
+	else{
+		if(class(distance) == "dist"){
+			d = distance
+		}
+		else{
+			d = dist(mat, method = distance)
+		}
+	}
+	
+	return(hclust(d, method = method))
+}
+
+scale_rows = function(x){
+	m = apply(x, 1, mean, na.rm = T)
+	s = apply(x, 1, sd, na.rm = T)
+	return((x - m) / s)
+}
+
+scale_mat = function(mat, scale){
+	if(!(scale %in% c("none", "row", "column"))){
+		stop("scale argument shoud take values: 'none', 'row' or 'column'")
+	}
+	mat = switch(scale, none = mat, row = scale_rows(mat), column = t(scale_rows(t(mat))))
+	return(mat)
+}
+
+generate_annotation_colours = function(annotation, annotation_colors, drop){
+	if(is.na(annotation_colors)[[1]][1]){
+		annotation_colors = list()
+	}
+	count = 0
+	for(i in 1:ncol(annotation)){
+		if(is.character(annotation[, i]) | is.factor(annotation[, i])){
+			if (is.factor(annotation[, i]) & !drop){
+				count = count + length(levels(annotation[, i]))
+			}
+			else{
+				count = count + length(unique(annotation[, i]))
+			}
+		}
+	}
+	
+	factor_colors = hsv((seq(0, 1, length.out = count + 1)[-1] + 
+      0.2)%%1, 0.7, 0.95)
+	
+	set.seed(3453)
+	
+	for(i in 1:ncol(annotation)){
+		if(!(colnames(annotation)[i] %in% names(annotation_colors))){
+			if(is.character(annotation[, i]) | is.factor(annotation[, i])){
+				n = length(unique(annotation[, i]))
+				if (is.factor(annotation[, i]) & !drop){
+					n = length(levels(annotation[, i]))
+				}
+				ind = sample(1:length(factor_colors), n)
+				annotation_colors[[colnames(annotation)[i]]] = factor_colors[ind]
+				l = levels(as.factor(annotation[, i]))
+				l = l[l %in% unique(annotation[, i])]
+				if (is.factor(annotation[, i]) & !drop){
+					l = levels(annotation[, i])
+				}
+				names(annotation_colors[[colnames(annotation)[i]]]) = l
+				factor_colors = factor_colors[-ind]
+			}
+			else{
+				r = runif(1)
+				annotation_colors[[colnames(annotation)[i]]] = hsv(r, c(0.1, 1), 1)
+			}
+		}
+	}
+	return(annotation_colors)
+}
+
+kmeans_pheatmap = function(mat, k = min(nrow(mat), 150), sd_limit = NA, ...){
+	# Filter data
+	if(!is.na(sd_limit)){
+		s = apply(mat, 1, sd)
+		mat = mat[s > sd_limit, ]	
+	}
+	
+	# Cluster data
+	set.seed(1245678)
+	km = kmeans(mat, k, iter.max = 100)
+	mat2 = km$centers
+	
+	# Compose rownames
+	t = table(km$cluster)
+	rownames(mat2) = sprintf("cl%s_size_%d", names(t), t)
+	
+	# Draw heatmap
+	pheatmap(mat2, ...)
+}
+ 
+#' A function to draw clustered heatmaps.
+#' 
+#' A function to draw clustered heatmaps where one has better control over some graphical 
+#' parameters such as cell size, etc. 
+#' 
+#' The function also allows to aggregate the rows using kmeans clustering. This is 
+#' advisable if number of rows is so big that R cannot handle their hierarchical 
+#' clustering anymore, roughly more than 1000. Instead of showing all the rows 
+#' separately one can cluster the rows in advance and show only the cluster centers. 
+#' The number of clusters can be tuned with parameter kmeans_k.
+#'
+#' @param mat numeric matrix of the values to be plotted.
+#' @param color vector of colors used in heatmap.
+#' @param kmeans_k the number of kmeans clusters to make, if we want to agggregate the 
+#' rows before drawing heatmap. If NA then the rows are not aggregated.
+#' @param breaks a sequence of numbers that covers the range of values in mat and is one 
+#' element longer than color vector. Used for mapping values to colors. Useful, if needed 
+#' to map certain values to certain colors, to certain values. If value is NA then the 
+#' breaks are calculated automatically.
+#' @param border_color color of cell borders on heatmap, use NA if no border should be 
+#' drawn.
+#' @param cellwidth individual cell width in points. If left as NA, then the values 
+#' depend on the size of plotting window.
+#' @param cellheight individual cell height in points. If left as NA, 
+#' then the values depend on the size of plotting window.
+#' @param scale character indicating if the values should be centered and scaled in 
+#' either the row direction or the column direction, or none. Corresponding values are 
+#' \code{"row"}, \code{"column"} and \code{"none"}
+#' @param cluster_rows boolean values determining if rows should be clustered,
+#' @param cluster_cols boolean values determining if columns should be clustered.
+#' @param clustering_distance_rows distance measure used in clustering rows. Possible 
+#' values are \code{"correlation"} for Pearson correlation and all the distances 
+#' supported by \code{\link{dist}}, such as \code{"euclidean"}, etc. If the value is none 
+#' of the above it is assumed that a distance matrix is provided.
+#' @param clustering_distance_cols distance measure used in clustering columns. Possible 
+#' values the same as for clustering_distance_rows.
+#' @param clustering_method clustering method used. Accepts the same values as 
+#' \code{\link{hclust}}.
+#' @param treeheight_row the height of a tree for rows, if these are clustered. 
+#' Default value 50 points.
+#' @param treeheight_col the height of a tree for columns, if these are clustered. 
+#' Default value 50 points.
+#' @param legend logical to determine if legend should be drawn or not.
+#' @param legend_breaks vector of breakpoints for the legend.
+#' @param legend_labels vector of labels for the \code{legend_breaks}.
+#' @param annotation data frame that specifies the annotations shown on top of the 
+#' columns. Each row defines the features for a specific column. The columns in the data 
+#' and rows in the annotation are matched using corresponding row and column names. Note 
+#' that color schemes takes into account if variable is continuous or discrete.
+#' @param annotation_colors list for specifying annotation track colors manually. It is 
+#' possible to define the colors for only some of the features. Check examples for 
+#' details.
+#' @param annotation_legend boolean value showing if the legend for annotation tracks 
+#' should be drawn. 
+#' @param drop_levels logical to determine if unused levels are also shown in the legend
+#' @param show_rownames boolean specifying if column names are be shown.
+#' @param show_colnames boolean specifying if column names are be shown.
+#' @param main the title of the plot
+#' @param fontsize base fontsize for the plot 
+#' @param fontsize_row fontsize for rownames (Default: fontsize) 
+#' @param fontsize_col fontsize for colnames (Default: fontsize) 
+#' @param display_numbers logical determining if the numeric values are also printed to 
+#' the cells. 
+#' @param number_format format strings (C printf style) of the numbers shown in cells. 
+#' For example "\code{\%.2f}" shows 2 decimal places and "\code{\%.1e}" shows exponential 
+#' notation (see more in \code{\link{sprintf}}).    
+#' @param fontsize_number fontsize of the numbers displayed in cells
+#' @param filename file path where to save the picture. Filetype is decided by 
+#' the extension in the path. Currently following formats are supported: png, pdf, tiff,
+#'  bmp, jpeg. Even if the plot does not fit into the plotting window, the file size is 
+#' calculated so that the plot would fit there, unless specified otherwise.
+#' @param width manual option for determining the output file width in inches.
+#' @param height manual option for determining the output file height in inches.
+#' @param \dots graphical parameters for the text used in plot. Parameters passed to 
+#' \code{\link{grid.text}}, see \code{\link{gpar}}. 
+#' 
+#' @return 
+#' Invisibly a list of components 
+#' \itemize{
+#' 	\item \code{tree_row} the clustering of rows as \code{\link{hclust}} object 
+#' 	\item \code{tree_col} the clustering of columns as \code{\link{hclust}} object
+#' 	\item \code{kmeans} the kmeans clustering of rows if parameter \code{kmeans_k} was 
+#' specified 
+#' }
+#' 
+#' @author  Raivo Kolde <rkolde@@gmail.com>
+#' @examples
+#'  # Generate some data
+#' test = matrix(rnorm(200), 20, 10)
+#' test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3
+#' test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2
+#' test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4
+#' colnames(test) = paste("Test", 1:10, sep = "")
+#' rownames(test) = paste("Gene", 1:20, sep = "")
+#' 
+#' # Draw heatmaps
+#' pheatmap(test)
+#' pheatmap(test, kmeans_k = 2)
+#' pheatmap(test, scale = "row", clustering_distance_rows = "correlation")
+#' pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50))
+#' pheatmap(test, cluster_row = FALSE)
+#' pheatmap(test, legend = FALSE)
+#' pheatmap(test, display_numbers = TRUE)
+#' pheatmap(test, display_numbers = TRUE, number_format = "%.1e")
+#' pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0", 
+#' "1e-4", "1e-3", "1e-2", "1e-1", "1"))
+#' pheatmap(test, cellwidth = 15, cellheight = 12, main = "Example heatmap")
+#' pheatmap(test, cellwidth = 15, cellheight = 12, fontsize = 8, filename = "test.pdf")
+#' 
+#' 
+#' # Generate column annotations
+#' annotation = data.frame(Var1 = factor(1:10 %% 2 == 0, 
+#' 				labels = c("Class1", "Class2")), Var2 = 1:10)
+#' annotation$Var1 = factor(annotation$Var1, levels = c("Class1", "Class2", "Class3"))
+#' rownames(annotation) = paste("Test", 1:10, sep = "")
+#' 
+#' pheatmap(test, annotation = annotation)
+#' pheatmap(test, annotation = annotation, annotation_legend = FALSE)
+#' pheatmap(test, annotation = annotation, annotation_legend = FALSE, drop_levels = FALSE)
+#' 
+#' # Specify colors
+#' Var1 = c("navy", "darkgreen")
+#' names(Var1) = c("Class1", "Class2")
+#' Var2 = c("lightgreen", "navy")
+#' 
+#' ann_colors = list(Var1 = Var1, Var2 = Var2)
+#' 
+#' pheatmap(test, annotation = annotation, annotation_colors = ann_colors, main = "Example")
+#' 
+#' # Specifying clustering from distance matrix
+#' drows = dist(test, method = "minkowski")
+#' dcols = dist(t(test), method = "minkowski")
+#' pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols)
+#'
+#' @export
+pheatmap_j = function(mat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100), kmeans_k = NA, breaks = NA, border_color = "grey60", border_width = 1, cellwidth = NA, cellheight = NA, scale = "none", cluster_rows = TRUE, cluster_cols = TRUE, clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean", clustering_method = "complete",  treeheight_row = ifelse(cluster_rows, 50, 0), treeheight_col = ifelse(cluster_cols, 50, 0), legend = TRUE, legend_breaks = NA, legend_labels = NA, annotation = NA, annotation_colors = NA, annotation_legend = TRUE, drop_levels = TRUE, show_rownames = T, show_colnames = T, main = NA, fontsize = 10, fontsize_row = fontsize, fontsize_col = fontsize, display_numbers = F, number_format = "%.2f", fontsize_number = 0.8 * fontsize, filename = NA, width = NA, height = NA, ...){
+	
+	# Preprocess matrix
+	mat = as.matrix(mat)
+	if(scale != "none"){
+		mat = scale_mat(mat, scale)
+		if(is.na(breaks)){
+			breaks = generate_breaks(mat, length(color), center = T)
+		}
+	}
+	
+	
+	# Kmeans
+	if(!is.na(kmeans_k)){
+		# Cluster data
+		km = kmeans(mat, kmeans_k, iter.max = 100)
+		mat = km$centers
+
+		# Compose rownames
+		t = table(km$cluster)
+		rownames(mat) = sprintf("cl%s_size_%d", names(t), t)
+	}
+	else{
+		km = NA
+	}
+	
+	# Do clustering
+	if(cluster_rows){
+		tree_row = cluster_mat(mat, distance = clustering_distance_rows, method = clustering_method)
+		mat = mat[tree_row$order, , drop = FALSE]
+	}
+	else{
+		tree_row = NA
+		treeheight_row = 0
+	}
+	
+	if(cluster_cols){
+		tree_col = cluster_mat(t(mat), distance = clustering_distance_cols, method = clustering_method)
+		mat = mat[, tree_col$order, drop = FALSE]
+	}
+	else{
+		tree_col = NA
+		treeheight_col = 0
+	}
+	
+	# Format numbers to be displayed in cells 
+	if(display_numbers){
+		fmat = matrix(sprintf(number_format, mat), nrow = nrow(mat), ncol = ncol(mat))
+		attr(fmat, "draw") = TRUE
+	}
+	else{
+		fmat = matrix(NA, nrow = nrow(mat), ncol = ncol(mat))
+		attr(fmat, "draw") = FALSE
+	}
+	
+	
+	# Colors and scales
+	if(!is.na(legend_breaks[1]) & !is.na(legend_labels[1])){
+		if(length(legend_breaks) != length(legend_labels)){
+			stop("Lengths of legend_breaks and legend_labels must be the same")
+		}
+	}
+	
+	
+	if(is.na(breaks[1])){
+      breaks = generate_breaks(as.vector(mat), length(color))
+  }
+  if (legend & is.na(legend_breaks[1])) {
+      legend = grid.pretty(range(as.vector(breaks)))
+			names(legend) = legend
+  }
+	else if(legend & !is.na(legend_breaks[1])){
+		legend = legend_breaks[legend_breaks >= min(breaks) & legend_breaks <= max(breaks)]
+		
+		if(!is.na(legend_labels[1])){
+			legend_labels = legend_labels[legend_breaks >= min(breaks) & legend_breaks <= max(breaks)]
+			names(legend) = legend_labels
+		}
+		else{
+			names(legend) = legend
+		}
+	}
+  else {
+      legend = NA
+  }
+	mat = scale_colours(mat, col = color, breaks = breaks)
+	
+	# Preparing annotation colors
+	if(!is.na(annotation[[1]][1])){
+		annotation = annotation[colnames(mat), , drop = F]
+		annotation_colors = generate_annotation_colours(annotation, annotation_colors, drop = drop_levels)
+	}
+	
+	if(!show_rownames){
+		rownames(mat) = NULL
+	}
+	
+	if(!show_colnames){
+		colnames(mat) = NULL
+	}
+	
+	# Draw heatmap
+	heatmap_motor(mat, border_color = border_color, border_width = border_width, cellwidth = cellwidth, cellheight = cellheight, treeheight_col = treeheight_col, treeheight_row = treeheight_row, tree_col = tree_col, tree_row = tree_row, filename = filename, width = width, height = height, breaks = breaks, color = color, legend = legend, annotation = annotation, annotation_colors = annotation_colors, annotation_legend = annotation_legend, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number = fontsize_number, ...)
+	
+	invisible(list(tree_row = tree_row, tree_col = tree_col, kmeans = km))
+}
+
+