comparison Marea/marea_cluster.py @ 37:2495c7772ca8 draft

Uploaded
author bimib
date Mon, 25 Nov 2019 11:57:57 -0500
parents 94c51690d40c
children 2a082b4aed02
comparison
equal deleted inserted replaced
36:94c51690d40c 37:2495c7772ca8
32 type = str, 32 type = str,
33 help = 'input dataset') 33 help = 'input dataset')
34 34
35 parser.add_argument('-cy', '--cluster_type', 35 parser.add_argument('-cy', '--cluster_type',
36 type = str, 36 type = str,
37 choices = ['kmeans', 'meanshift', 'dbscan', 'hierarchy'], 37 choices = ['kmeans', 'dbscan', 'hierarchy'],
38 default = 'kmeans', 38 default = 'kmeans',
39 help = 'choose clustering algorythm') 39 help = 'choose clustering algorythm')
40 40
41 parser.add_argument('-k1', '--k_min', 41 parser.add_argument('-k1', '--k_min',
42 type = int, 42 type = int,
57 parser.add_argument('-si', '--silhouette', 57 parser.add_argument('-si', '--silhouette',
58 type = str, 58 type = str,
59 default = 'false', 59 default = 'false',
60 choices = ['true', 'false'], 60 choices = ['true', 'false'],
61 help = 'choose if you want silhouette plots') 61 help = 'choose if you want silhouette plots')
62
63 parser.add_argument('-db', '--davies',
64 type = str,
65 default = 'false',
66 choices = ['true', 'false'],
67 help = 'choose if you want davies bouldin scores')
68 62
69 parser.add_argument('-td', '--tool_dir', 63 parser.add_argument('-td', '--tool_dir',
70 type = str, 64 type = str,
71 required = True, 65 required = True,
72 help = 'your tool directory') 66 help = 'your tool directory')
150 144
151 return best_index 145 return best_index
152 146
153 ################################ kmeans ##################################### 147 ################################ kmeans #####################################
154 148
155 def kmeans (k_min, k_max, dataset, elbow, silhouette, davies, best_cluster): 149 def kmeans (k_min, k_max, dataset, elbow, silhouette, best_cluster):
156 if not os.path.exists('clustering'): 150 if not os.path.exists('clustering'):
157 os.makedirs('clustering') 151 os.makedirs('clustering')
158 152
159 153
160 if elbow == 'true': 154 if elbow == 'true':
165 if silhouette == 'true': 159 if silhouette == 'true':
166 silhouette = True 160 silhouette = True
167 else: 161 else:
168 silhouette = False 162 silhouette = False
169 163
170 if davies == 'true':
171 davies = True
172 else:
173 davies = False
174
175
176 range_n_clusters = [i for i in range(k_min, k_max+1)] 164 range_n_clusters = [i for i in range(k_min, k_max+1)]
177 distortions = [] 165 distortions = []
178 scores = [] 166 scores = []
179 all_labels = [] 167 all_labels = []
180 168
339 if not os.path.exists('clustering'): 327 if not os.path.exists('clustering'):
340 os.makedirs('clustering') 328 os.makedirs('clustering')
341 329
342 plt.figure(figsize=(10, 7)) 330 plt.figure(figsize=(10, 7))
343 plt.title("Customer Dendograms") 331 plt.title("Customer Dendograms")
344 shc.dendrogram(shc.linkage(dataset, method='ward')) 332 shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist())
345 fig = plt.gcf() 333 fig = plt.gcf()
346 fig.savefig('clustering/dendogram.png', dpi=200) 334 fig.savefig('clustering/dendogram.png', dpi=200)
347 335
348 range_n_clusters = [i for i in range(k_min, k_max+1)] 336 range_n_clusters = [i for i in range(k_min, k_max+1)]
349 337
395 if tmp == None: 383 if tmp == None:
396 X = X.drop(columns=[i]) 384 X = X.drop(columns=[i])
397 385
398 386
399 if args.cluster_type == 'kmeans': 387 if args.cluster_type == 'kmeans':
400 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster) 388 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.best_cluster)
401 389
402 if args.cluster_type == 'dbscan': 390 if args.cluster_type == 'dbscan':
403 dbscan(X, args.eps, args.min_samples, args.best_cluster) 391 dbscan(X, args.eps, args.min_samples, args.best_cluster)
404 392
405 if args.cluster_type == 'hierarchy': 393 if args.cluster_type == 'hierarchy':