diff PLIDflow/scripts/clusterfinder_Auto.R @ 6:795e11fac81b draft default tip

Included new tools for standardization
author bitlab
date Wed, 22 Apr 2020 06:12:00 -0400
parents 6fcfa4756040
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PLIDflow/scripts/clusterfinder_Auto.R	Wed Apr 22 06:12:00 2020 -0400
@@ -0,0 +1,170 @@
+# #clusterfinder_Auto.R makes a file containig the coordenates of the identified cluster/-s by Silhouette criterion  
+# 
+#!/usr/bin/env Rscript      
+args = commandArgs(trailingOnly=TRUE) 
+
+if(length(args) < 1){
+  stop("USE: Rscript clusterfinder_Auto.R <session_id>")
+}
+
+#Arguments definition 
+
+session_id <- args[1]
+
+print(session_id)
+
+setwd(session_id)
+
+options(error=traceback)
+options(warn=-1)
+
+#Load required library
+library(cluster)
+
+#Load required libraries
+library(clValid)
+
+#Loading required package: cluster
+library(fpc)
+
+#Install NbClust package
+#install.packages("NbClust",dependencies = TRUE)
+
+#Loading required package: NbClust
+library(NbClust)
+
+
+#Read the dataset
+data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="") 
+#data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="") 
+
+
+#Run the XXX for selection of the number of clusters
+
+clustersdata <- NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1)
+
+#capture.output(clustersdata$Best.nc[1,], file = "bestnumberclusters.txt")
+
+#capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "proofclusters.txt")
+
+#capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "numclusters.txt")
+
+#best_num_cluster <- scan("proofclusters.txt", what = character(), quiet = TRUE)
+
+k.max <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))])
+
+km.res <- kmeans(as.matrix(data_centers), centers = k.max, nstart = 25)
+
+best_position <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))]) 
+
+# Make clustercoordenates table
+#X Coordenate
+start_x <- best_position + 1
+end_x <- start_x + (best_position - 1)
+
+cluster_pos_x <- 1
+x_pos <- c()
+for (c in start_x:end_x){
+	  x_pos[cluster_pos_x] <- km.res$centers[c]
+  cluster_pos_x <- cluster_pos_x + 1
+}
+#print(x_pos)
+
+#Y Coordenate
+start_y <- end_x + 1
+end_y <- start_y + (best_position - 1)
+
+cluster_pos_y <- 1
+y_pos <- c()
+for(cc in start_y:end_y){
+	  y_pos[cluster_pos_y] <- km.res$centers[cc]
+  cluster_pos_y <- cluster_pos_y + 1
+}
+#print(y_pos)
+
+#Z Coordenate
+start_z <- end_y + 1
+end_z <- start_z + (best_position - 1)
+
+cluster_pos_z <- 1
+z_pos <- c()
+for(ccc in start_z:end_z){
+	  z_pos[cluster_pos_z] <- km.res$centers[ccc]
+  cluster_pos_z <- cluster_pos_z + 1
+}
+#print(z_pos)
+
+#Create a file with clusters coordenates. Cluster coordenates are vectors x_pos, y_pos, z_pos
+num_filas <- length(x_pos)
+clusters_tabla <- matrix(1, nrow = num_filas, ncol = 4) #columns are column 1 number of ccluster, column 2 x-coordenates, colum 3 y-coordenates, column 4 z-coordenates
+
+##Add number of cluster located in column 1
+for(i in 1:num_filas){
+  clusters_tabla[i,1] <- i
+}
+
+##Add cluster x,y,z-coordenates in clusters_tabla 
+#x-coordenates
+v_pos_x <- 1
+for(f in 1:num_filas){
+  clusters_tabla[f,2] <- x_pos[v_pos_x]
+  v_pos_x <- v_pos_x + 1
+}
+
+#y-coordenates
+v_pos_y <- 1
+for(ff in 1:num_filas){
+  clusters_tabla[ff,3] <- y_pos[v_pos_y]
+  v_pos_y <- v_pos_y + 1
+}
+
+#z-coordenates
+v_pos_z <- 1
+for(fff in 1:num_filas){
+  clusters_tabla[fff,4] <- z_pos[v_pos_z]
+  v_pos_z <- v_pos_z + 1
+}
+
+#Write the head for the file which contains the number of clusters annd their coordenates  
+cabecera_clusterscoordenates <- paste("cluster", sep = " ")
+cabecera <- c("x","y","z")
+
+for(i in 1:3){
+  cabecera_clusterscoordenates <- paste(cabecera_clusterscoordenates, cabecera[i], sep=" ")
+}
+#write(cabecera_clusterscoordenates, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE) 
+write(cabecera_clusterscoordenates, file="clusterscoordenates.txt", append= TRUE) 
+
+#Write rows containing number of cluster and coordenates
+for(i in 1:nrow(clusters_tabla)){
+  fila_completa <- paste(clusters_tabla[i,1], sep = " ")
+  for(j in 2:ncol(clusters_tabla)){
+    fila_completa <- paste(fila_completa, clusters_tabla[i,j], sep=" ")
+  }
+  write(fila_completa, file="clusterscoordenates.txt", append= TRUE)
+  #write(fila_completa, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE)
+}
+
+
+#Table to see Binding Sited finded in Galaxy screen
+#BS_screen <- scan("/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", what = character(), quiet = TRUE)
+BS_screen <- scan("clusterscoordenates.txt", what = character(), quiet = TRUE)
+
+BS_table <- matrix(1, nrow = nrow(clusters_tabla), ncol = ncol(clusters_tabla))
+
+bs_count <- 5
+for(i in 1:nrow(clusters_tabla)){
+  for(j in 1:ncol(clusters_tabla)){
+    BS_table[i,j] <- BS_screen[bs_count]
+    bs_count <- bs_count +1
+  }
+}
+
+colnames(BS_table) <- c("Binding Site", "X", "Y", "Z")
+print(BS_table)
+
+
+
+
+
+