Mercurial > repos > bitlab > plidflow
comparison PLIDflow/scripts/clusterfinder_Auto.R @ 6:795e11fac81b draft default tip
Included new tools for standardization
author | bitlab |
---|---|
date | Wed, 22 Apr 2020 06:12:00 -0400 |
parents | 6fcfa4756040 |
children |
comparison
equal
deleted
inserted
replaced
5:97f12f7cc852 | 6:795e11fac81b |
---|---|
1 # #clusterfinder_Auto.R makes a file containig the coordenates of the identified cluster/-s by Silhouette criterion | |
2 # | |
3 #!/usr/bin/env Rscript | |
4 args = commandArgs(trailingOnly=TRUE) | |
5 | |
6 if(length(args) < 1){ | |
7 stop("USE: Rscript clusterfinder_Auto.R <session_id>") | |
8 } | |
9 | |
10 #Arguments definition | |
11 | |
12 session_id <- args[1] | |
13 | |
14 print(session_id) | |
15 | |
16 setwd(session_id) | |
17 | |
18 options(error=traceback) | |
19 options(warn=-1) | |
20 | |
21 #Load required library | |
22 library(cluster) | |
23 | |
24 #Load required libraries | |
25 library(clValid) | |
26 | |
27 #Loading required package: cluster | |
28 library(fpc) | |
29 | |
30 #Install NbClust package | |
31 #install.packages("NbClust",dependencies = TRUE) | |
32 | |
33 #Loading required package: NbClust | |
34 library(NbClust) | |
35 | |
36 | |
37 #Read the dataset | |
38 data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="") | |
39 #data_centers <- read.table("fillouts1file.txt", header=TRUE, sep=";", na.strings="") | |
40 | |
41 | |
42 #Run the XXX for selection of the number of clusters | |
43 | |
44 clustersdata <- NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1) | |
45 | |
46 #capture.output(clustersdata$Best.nc[1,], file = "bestnumberclusters.txt") | |
47 | |
48 #capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "proofclusters.txt") | |
49 | |
50 #capture.output(NbClust(data_centers, diss=NULL, distance = "euclidean", min.nc=2, max.nc=15, method = "kmeans", index = "all", alphaBeale = 0.1), file = "numclusters.txt") | |
51 | |
52 #best_num_cluster <- scan("proofclusters.txt", what = character(), quiet = TRUE) | |
53 | |
54 k.max <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))]) | |
55 | |
56 km.res <- kmeans(as.matrix(data_centers), centers = k.max, nstart = 25) | |
57 | |
58 best_position <- as.integer(rownames(table(clustersdata$Best.nc[1,]))[which.max(apply(table(clustersdata$Best.nc[1,]),MARGIN=1,max))]) | |
59 | |
60 # Make clustercoordenates table | |
61 #X Coordenate | |
62 start_x <- best_position + 1 | |
63 end_x <- start_x + (best_position - 1) | |
64 | |
65 cluster_pos_x <- 1 | |
66 x_pos <- c() | |
67 for (c in start_x:end_x){ | |
68 x_pos[cluster_pos_x] <- km.res$centers[c] | |
69 cluster_pos_x <- cluster_pos_x + 1 | |
70 } | |
71 #print(x_pos) | |
72 | |
73 #Y Coordenate | |
74 start_y <- end_x + 1 | |
75 end_y <- start_y + (best_position - 1) | |
76 | |
77 cluster_pos_y <- 1 | |
78 y_pos <- c() | |
79 for(cc in start_y:end_y){ | |
80 y_pos[cluster_pos_y] <- km.res$centers[cc] | |
81 cluster_pos_y <- cluster_pos_y + 1 | |
82 } | |
83 #print(y_pos) | |
84 | |
85 #Z Coordenate | |
86 start_z <- end_y + 1 | |
87 end_z <- start_z + (best_position - 1) | |
88 | |
89 cluster_pos_z <- 1 | |
90 z_pos <- c() | |
91 for(ccc in start_z:end_z){ | |
92 z_pos[cluster_pos_z] <- km.res$centers[ccc] | |
93 cluster_pos_z <- cluster_pos_z + 1 | |
94 } | |
95 #print(z_pos) | |
96 | |
97 #Create a file with clusters coordenates. Cluster coordenates are vectors x_pos, y_pos, z_pos | |
98 num_filas <- length(x_pos) | |
99 clusters_tabla <- matrix(1, nrow = num_filas, ncol = 4) #columns are column 1 number of ccluster, column 2 x-coordenates, colum 3 y-coordenates, column 4 z-coordenates | |
100 | |
101 ##Add number of cluster located in column 1 | |
102 for(i in 1:num_filas){ | |
103 clusters_tabla[i,1] <- i | |
104 } | |
105 | |
106 ##Add cluster x,y,z-coordenates in clusters_tabla | |
107 #x-coordenates | |
108 v_pos_x <- 1 | |
109 for(f in 1:num_filas){ | |
110 clusters_tabla[f,2] <- x_pos[v_pos_x] | |
111 v_pos_x <- v_pos_x + 1 | |
112 } | |
113 | |
114 #y-coordenates | |
115 v_pos_y <- 1 | |
116 for(ff in 1:num_filas){ | |
117 clusters_tabla[ff,3] <- y_pos[v_pos_y] | |
118 v_pos_y <- v_pos_y + 1 | |
119 } | |
120 | |
121 #z-coordenates | |
122 v_pos_z <- 1 | |
123 for(fff in 1:num_filas){ | |
124 clusters_tabla[fff,4] <- z_pos[v_pos_z] | |
125 v_pos_z <- v_pos_z + 1 | |
126 } | |
127 | |
128 #Write the head for the file which contains the number of clusters annd their coordenates | |
129 cabecera_clusterscoordenates <- paste("cluster", sep = " ") | |
130 cabecera <- c("x","y","z") | |
131 | |
132 for(i in 1:3){ | |
133 cabecera_clusterscoordenates <- paste(cabecera_clusterscoordenates, cabecera[i], sep=" ") | |
134 } | |
135 #write(cabecera_clusterscoordenates, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE) | |
136 write(cabecera_clusterscoordenates, file="clusterscoordenates.txt", append= TRUE) | |
137 | |
138 #Write rows containing number of cluster and coordenates | |
139 for(i in 1:nrow(clusters_tabla)){ | |
140 fila_completa <- paste(clusters_tabla[i,1], sep = " ") | |
141 for(j in 2:ncol(clusters_tabla)){ | |
142 fila_completa <- paste(fila_completa, clusters_tabla[i,j], sep=" ") | |
143 } | |
144 write(fila_completa, file="clusterscoordenates.txt", append= TRUE) | |
145 #write(fila_completa, file="/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", append= TRUE) | |
146 } | |
147 | |
148 | |
149 #Table to see Binding Sited finded in Galaxy screen | |
150 #BS_screen <- scan("/home/galaxy/galaxy/tools/proteindocking/scripts/clusterscoordenates.txt", what = character(), quiet = TRUE) | |
151 BS_screen <- scan("clusterscoordenates.txt", what = character(), quiet = TRUE) | |
152 | |
153 BS_table <- matrix(1, nrow = nrow(clusters_tabla), ncol = ncol(clusters_tabla)) | |
154 | |
155 bs_count <- 5 | |
156 for(i in 1:nrow(clusters_tabla)){ | |
157 for(j in 1:ncol(clusters_tabla)){ | |
158 BS_table[i,j] <- BS_screen[bs_count] | |
159 bs_count <- bs_count +1 | |
160 } | |
161 } | |
162 | |
163 colnames(BS_table) <- c("Binding Site", "X", "Y", "Z") | |
164 print(BS_table) | |
165 | |
166 | |
167 | |
168 | |
169 | |
170 |