0
|
1 # #clusterfinder_Auto.R makes a file containig the coordenates of the identified cluster/-s by Silhouette criterion
|
|
2 #
|
|
3 #!/usr/bin/env Rscript
|
|
4 args = commandArgs(trailingOnly=TRUE)
|
|
5
|
|
6 if(length(args) < 1){
|
|
7 stop("USE: Rscript clusterfinder_Auto.R <session_id>")
|
|
8 }
|
|
9
|
|
10 #Arguments definition
|
|
11
|
|
12 session_id <- args[1]
|
|
13
|
|
14 print(session_id)
|
|
15
|
|
16 setwd(session_id)
|
|
17
|
|
18 options(error=traceback)
|
|
19 options(warn=-1)
|
|
20
|
|
21 #Load required library
|
|
22 library(cluster)
|
|
23
|
|
24 #Load required libraries
|
|
25 library(clValid)
|
|
26
|
|
27 #Loading required package: cluster
|
|
28 library(fpc)
|
|
29
|
|
30 #Install NbClust package
|
|
31 #install.packages("NbClust",dependencies = TRUE)
|
|
32
|
|
33 #Loading required package: NbClust
|
|
34 library(NbClust)
|
|
35
|
|
36
|
|
37 #Read the dataset
|
|
38 data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="")
|
|
39 #data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="")
|
|
40
|
|
41
|
|
42 #Run the XXX for selection of the number of clusters
|
|
43
|
|
44 clustersdata <- NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1)
|
|
45
|
|
46 #capture.output(clustersdata$Best.nc[1,], file = "bestnumberclusters.txt")
|
|
47
|
|
48 #capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "proofclusters.txt")
|
|
49
|
|
50 #capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "numclusters.txt")
|
|
51
|
|
52 #best_num_cluster <- scan("proofclusters.txt", what = character(), quiet = TRUE)
|
|
53
|
|
54 k.max <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))])
|
|
55
|
|
56 km.res <- kmeans(as.matrix(data_centers), centers = k.max, nstart = 25)
|
|
57
|
|
58 best_position <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))])
|
|
59
|
|
60 # Make clustercoordenates table
|
|
61 #X Coordenate
|
|
62 start_x <- best_position + 1
|
|
63 end_x <- start_x + (best_position - 1)
|
|
64
|
|
65 cluster_pos_x <- 1
|
|
66 x_pos <- c()
|
|
67 for (c in start_x:end_x){
|
|
68 x_pos[cluster_pos_x] <- km.res$centers[c]
|
|
69 cluster_pos_x <- cluster_pos_x + 1
|
|
70 }
|
|
71 #print(x_pos)
|
|
72
|
|
73 #Y Coordenate
|
|
74 start_y <- end_x + 1
|
|
75 end_y <- start_y + (best_position - 1)
|
|
76
|
|
77 cluster_pos_y <- 1
|
|
78 y_pos <- c()
|
|
79 for(cc in start_y:end_y){
|
|
80 y_pos[cluster_pos_y] <- km.res$centers[cc]
|
|
81 cluster_pos_y <- cluster_pos_y + 1
|
|
82 }
|
|
83 #print(y_pos)
|
|
84
|
|
85 #Z Coordenate
|
|
86 start_z <- end_y + 1
|
|
87 end_z <- start_z + (best_position - 1)
|
|
88
|
|
89 cluster_pos_z <- 1
|
|
90 z_pos <- c()
|
|
91 for(ccc in start_z:end_z){
|
|
92 z_pos[cluster_pos_z] <- km.res$centers[ccc]
|
|
93 cluster_pos_z <- cluster_pos_z + 1
|
|
94 }
|
|
95 #print(z_pos)
|
|
96
|
|
97 #Create a file with clusters coordenates. Cluster coordenates are vectors x_pos, y_pos, z_pos
|
|
98 num_filas <- length(x_pos)
|
|
99 clusters_tabla <- matrix(1, nrow = num_filas, ncol = 4) #columns are column 1 number of ccluster, column 2 x-coordenates, colum 3 y-coordenates, column 4 z-coordenates
|
|
100
|
|
101 ##Add number of cluster located in column 1
|
|
102 for(i in 1:num_filas){
|
|
103 clusters_tabla[i,1] <- i
|
|
104 }
|
|
105
|
|
106 ##Add cluster x,y,z-coordenates in clusters_tabla
|
|
107 #x-coordenates
|
|
108 v_pos_x <- 1
|
|
109 for(f in 1:num_filas){
|
|
110 clusters_tabla[f,2] <- x_pos[v_pos_x]
|
|
111 v_pos_x <- v_pos_x + 1
|
|
112 }
|
|
113
|
|
114 #y-coordenates
|
|
115 v_pos_y <- 1
|
|
116 for(ff in 1:num_filas){
|
|
117 clusters_tabla[ff,3] <- y_pos[v_pos_y]
|
|
118 v_pos_y <- v_pos_y + 1
|
|
119 }
|
|
120
|
|
121 #z-coordenates
|
|
122 v_pos_z <- 1
|
|
123 for(fff in 1:num_filas){
|
|
124 clusters_tabla[fff,4] <- z_pos[v_pos_z]
|
|
125 v_pos_z <- v_pos_z + 1
|
|
126 }
|
|
127
|
|
128 #Write the head for the file which contains the number of clusters annd their coordenates
|
|
129 cabecera_clusterscoordenates <- paste("cluster", sep = " ")
|
|
130 cabecera <- c("x","y","z")
|
|
131
|
|
132 for(i in 1:3){
|
|
133 cabecera_clusterscoordenates <- paste(cabecera_clusterscoordenates, cabecera[i], sep=" ")
|
|
134 }
|
|
135 #write(cabecera_clusterscoordenates, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE)
|
|
136 write(cabecera_clusterscoordenates, file="clusterscoordenates.txt", append= TRUE)
|
|
137
|
|
138 #Write rows containing number of cluster and coordenates
|
|
139 for(i in 1:nrow(clusters_tabla)){
|
|
140 fila_completa <- paste(clusters_tabla[i,1], sep = " ")
|
|
141 for(j in 2:ncol(clusters_tabla)){
|
|
142 fila_completa <- paste(fila_completa, clusters_tabla[i,j], sep=" ")
|
|
143 }
|
|
144 write(fila_completa, file="clusterscoordenates.txt", append= TRUE)
|
|
145 #write(fila_completa, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE)
|
|
146 }
|
|
147
|
|
148
|
|
149 #Table to see Binding Sited finded in Galaxy screen
|
|
150 #BS_screen <- scan("/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", what = character(), quiet = TRUE)
|
|
151 BS_screen <- scan("clusterscoordenates.txt", what = character(), quiet = TRUE)
|
|
152
|
|
153 BS_table <- matrix(1, nrow = nrow(clusters_tabla), ncol = ncol(clusters_tabla))
|
|
154
|
|
155 bs_count <- 5
|
|
156 for(i in 1:nrow(clusters_tabla)){
|
|
157 for(j in 1:ncol(clusters_tabla)){
|
|
158 BS_table[i,j] <- BS_screen[bs_count]
|
|
159 bs_count <- bs_count +1
|
|
160 }
|
|
161 }
|
|
162
|
|
163 colnames(BS_table) <- c("Binding Site", "X", "Y", "Z")
|
|
164 print(BS_table)
|
|
165
|
|
166
|
|
167
|
|
168
|
|
169
|
|
170
|