comparison PLIDflow/scripts/clusterfinder_Auto.R @ 6:795e11fac81b draft default tip

Included new tools for standardization
author bitlab
date Wed, 22 Apr 2020 06:12:00 -0400
parents 6fcfa4756040
children
comparison
equal deleted inserted replaced
5:97f12f7cc852 6:795e11fac81b
1 # #clusterfinder_Auto.R makes a file containig the coordenates of the identified cluster/-s by Silhouette criterion
2 #
3 #!/usr/bin/env Rscript
4 args = commandArgs(trailingOnly=TRUE)
5
6 if(length(args) < 1){
7 stop("USE: Rscript clusterfinder_Auto.R <session_id>")
8 }
9
10 #Arguments definition
11
12 session_id <- args[1]
13
14 print(session_id)
15
16 setwd(session_id)
17
18 options(error=traceback)
19 options(warn=-1)
20
21 #Load required library
22 library(cluster)
23
24 #Load required libraries
25 library(clValid)
26
27 #Loading required package: cluster
28 library(fpc)
29
30 #Install NbClust package
31 #install.packages("NbClust",dependencies = TRUE)
32
33 #Loading required package: NbClust
34 library(NbClust)
35
36
37 #Read the dataset
38 data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="")
39 #data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="")
40
41
42 #Run the XXX for selection of the number of clusters
43
44 clustersdata <- NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1)
45
46 #capture.output(clustersdata$Best.nc[1,], file = "bestnumberclusters.txt")
47
48 #capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "proofclusters.txt")
49
50 #capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "numclusters.txt")
51
52 #best_num_cluster <- scan("proofclusters.txt", what = character(), quiet = TRUE)
53
54 k.max <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))])
55
56 km.res <- kmeans(as.matrix(data_centers), centers = k.max, nstart = 25)
57
58 best_position <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))])
59
60 # Make clustercoordenates table
61 #X Coordenate
62 start_x <- best_position + 1
63 end_x <- start_x + (best_position - 1)
64
65 cluster_pos_x <- 1
66 x_pos <- c()
67 for (c in start_x:end_x){
68 x_pos[cluster_pos_x] <- km.res$centers[c]
69 cluster_pos_x <- cluster_pos_x + 1
70 }
71 #print(x_pos)
72
73 #Y Coordenate
74 start_y <- end_x + 1
75 end_y <- start_y + (best_position - 1)
76
77 cluster_pos_y <- 1
78 y_pos <- c()
79 for(cc in start_y:end_y){
80 y_pos[cluster_pos_y] <- km.res$centers[cc]
81 cluster_pos_y <- cluster_pos_y + 1
82 }
83 #print(y_pos)
84
85 #Z Coordenate
86 start_z <- end_y + 1
87 end_z <- start_z + (best_position - 1)
88
89 cluster_pos_z <- 1
90 z_pos <- c()
91 for(ccc in start_z:end_z){
92 z_pos[cluster_pos_z] <- km.res$centers[ccc]
93 cluster_pos_z <- cluster_pos_z + 1
94 }
95 #print(z_pos)
96
97 #Create a file with clusters coordenates. Cluster coordenates are vectors x_pos, y_pos, z_pos
98 num_filas <- length(x_pos)
99 clusters_tabla <- matrix(1, nrow = num_filas, ncol = 4) #columns are column 1 number of ccluster, column 2 x-coordenates, colum 3 y-coordenates, column 4 z-coordenates
100
101 ##Add number of cluster located in column 1
102 for(i in 1:num_filas){
103 clusters_tabla[i,1] <- i
104 }
105
106 ##Add cluster x,y,z-coordenates in clusters_tabla
107 #x-coordenates
108 v_pos_x <- 1
109 for(f in 1:num_filas){
110 clusters_tabla[f,2] <- x_pos[v_pos_x]
111 v_pos_x <- v_pos_x + 1
112 }
113
114 #y-coordenates
115 v_pos_y <- 1
116 for(ff in 1:num_filas){
117 clusters_tabla[ff,3] <- y_pos[v_pos_y]
118 v_pos_y <- v_pos_y + 1
119 }
120
121 #z-coordenates
122 v_pos_z <- 1
123 for(fff in 1:num_filas){
124 clusters_tabla[fff,4] <- z_pos[v_pos_z]
125 v_pos_z <- v_pos_z + 1
126 }
127
128 #Write the head for the file which contains the number of clusters annd their coordenates
129 cabecera_clusterscoordenates <- paste("cluster", sep = " ")
130 cabecera <- c("x","y","z")
131
132 for(i in 1:3){
133 cabecera_clusterscoordenates <- paste(cabecera_clusterscoordenates, cabecera[i], sep=" ")
134 }
135 #write(cabecera_clusterscoordenates, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE)
136 write(cabecera_clusterscoordenates, file="clusterscoordenates.txt", append= TRUE)
137
138 #Write rows containing number of cluster and coordenates
139 for(i in 1:nrow(clusters_tabla)){
140 fila_completa <- paste(clusters_tabla[i,1], sep = " ")
141 for(j in 2:ncol(clusters_tabla)){
142 fila_completa <- paste(fila_completa, clusters_tabla[i,j], sep=" ")
143 }
144 write(fila_completa, file="clusterscoordenates.txt", append= TRUE)
145 #write(fila_completa, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE)
146 }
147
148
149 #Table to see Binding Sited finded in Galaxy screen
150 #BS_screen <- scan("/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", what = character(), quiet = TRUE)
151 BS_screen <- scan("clusterscoordenates.txt", what = character(), quiet = TRUE)
152
153 BS_table <- matrix(1, nrow = nrow(clusters_tabla), ncol = ncol(clusters_tabla))
154
155 bs_count <- 5
156 for(i in 1:nrow(clusters_tabla)){
157 for(j in 1:ncol(clusters_tabla)){
158 BS_table[i,j] <- BS_screen[bs_count]
159 bs_count <- bs_count +1
160 }
161 }
162
163 colnames(BS_table) <- c("Binding Site", "X", "Y", "Z")
164 print(BS_table)
165
166
167
168
169
170