Mercurial > repos > bitlab > plidflow
diff PLIDflow/scripts/clusterfinder_Auto.R @ 6:795e11fac81b draft default tip
Included new tools for standardization
author | bitlab |
---|---|
date | Wed, 22 Apr 2020 06:12:00 -0400 |
parents | 6fcfa4756040 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PLIDflow/scripts/clusterfinder_Auto.R Wed Apr 22 06:12:00 2020 -0400 @@ -0,0 +1,170 @@ +# #clusterfinder_Auto.R makes a file containig the coordenates of the identified cluster/-s by Silhouette criterion +# +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE) + +if(length(args) < 1){ + stop("USE: Rscript clusterfinder_Auto.R <session_id>") +} + +#Arguments definition + +session_id <- args[1] + +print(session_id) + +setwd(session_id) + +options(error=traceback) +options(warn=-1) + +#Load required library +library(cluster) + +#Load required libraries +library(clValid) + +#Loading required package: cluster +library(fpc) + +#Install NbClust package +#install.packages("NbClust",dependencies = TRUE) + +#Loading required package: NbClust +library(NbClust) + + +#Read the dataset +data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="") +#data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="") + + +#Run the XXX for selection of the number of clusters + +clustersdata <- NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1) + +#capture.output(clustersdata$Best.nc[1,], file = "bestnumberclusters.txt") + +#capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "proofclusters.txt") + +#capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "numclusters.txt") + +#best_num_cluster <- scan("proofclusters.txt", what = character(), quiet = TRUE) + +k.max <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))]) + +km.res <- kmeans(as.matrix(data_centers), centers = k.max, nstart = 25) + +best_position <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))]) + +# Make clustercoordenates table +#X Coordenate +start_x <- best_position + 1 +end_x <- start_x + (best_position - 1) + +cluster_pos_x <- 1 +x_pos <- c() +for (c in start_x:end_x){ + x_pos[cluster_pos_x] <- km.res$centers[c] + cluster_pos_x <- cluster_pos_x + 1 +} +#print(x_pos) + +#Y Coordenate +start_y <- end_x + 1 +end_y <- start_y + (best_position - 1) + +cluster_pos_y <- 1 +y_pos <- c() +for(cc in start_y:end_y){ + y_pos[cluster_pos_y] <- km.res$centers[cc] + cluster_pos_y <- cluster_pos_y + 1 +} +#print(y_pos) + +#Z Coordenate +start_z <- end_y + 1 +end_z <- start_z + (best_position - 1) + +cluster_pos_z <- 1 +z_pos <- c() +for(ccc in start_z:end_z){ + z_pos[cluster_pos_z] <- km.res$centers[ccc] + cluster_pos_z <- cluster_pos_z + 1 +} +#print(z_pos) + +#Create a file with clusters coordenates. Cluster coordenates are vectors x_pos, y_pos, z_pos +num_filas <- length(x_pos) +clusters_tabla <- matrix(1, nrow = num_filas, ncol = 4) #columns are column 1 number of ccluster, column 2 x-coordenates, colum 3 y-coordenates, column 4 z-coordenates + +##Add number of cluster located in column 1 +for(i in 1:num_filas){ + clusters_tabla[i,1] <- i +} + +##Add cluster x,y,z-coordenates in clusters_tabla +#x-coordenates +v_pos_x <- 1 +for(f in 1:num_filas){ + clusters_tabla[f,2] <- x_pos[v_pos_x] + v_pos_x <- v_pos_x + 1 +} + +#y-coordenates +v_pos_y <- 1 +for(ff in 1:num_filas){ + clusters_tabla[ff,3] <- y_pos[v_pos_y] + v_pos_y <- v_pos_y + 1 +} + +#z-coordenates +v_pos_z <- 1 +for(fff in 1:num_filas){ + clusters_tabla[fff,4] <- z_pos[v_pos_z] + v_pos_z <- v_pos_z + 1 +} + +#Write the head for the file which contains the number of clusters annd their coordenates +cabecera_clusterscoordenates <- paste("cluster", sep = " ") +cabecera <- c("x","y","z") + +for(i in 1:3){ + cabecera_clusterscoordenates <- paste(cabecera_clusterscoordenates, cabecera[i], sep=" ") +} +#write(cabecera_clusterscoordenates, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE) +write(cabecera_clusterscoordenates, file="clusterscoordenates.txt", append= TRUE) + +#Write rows containing number of cluster and coordenates +for(i in 1:nrow(clusters_tabla)){ + fila_completa <- paste(clusters_tabla[i,1], sep = " ") + for(j in 2:ncol(clusters_tabla)){ + fila_completa <- paste(fila_completa, clusters_tabla[i,j], sep=" ") + } + write(fila_completa, file="clusterscoordenates.txt", append= TRUE) + #write(fila_completa, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE) +} + + +#Table to see Binding Sited finded in Galaxy screen +#BS_screen <- scan("/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", what = character(), quiet = TRUE) +BS_screen <- scan("clusterscoordenates.txt", what = character(), quiet = TRUE) + +BS_table <- matrix(1, nrow = nrow(clusters_tabla), ncol = ncol(clusters_tabla)) + +bs_count <- 5 +for(i in 1:nrow(clusters_tabla)){ + for(j in 1:ncol(clusters_tabla)){ + BS_table[i,j] <- BS_screen[bs_count] + bs_count <- bs_count +1 + } +} + +colnames(BS_table) <- c("Binding Site", "X", "Y", "Z") +print(BS_table) + + + + + +