Mercurial > repos > bimib > marea
comparison Marea/marea_cluster.py @ 37:2495c7772ca8 draft
Uploaded
author | bimib |
---|---|
date | Mon, 25 Nov 2019 11:57:57 -0500 |
parents | 94c51690d40c |
children | 2a082b4aed02 |
comparison
equal
deleted
inserted
replaced
36:94c51690d40c | 37:2495c7772ca8 |
---|---|
32 type = str, | 32 type = str, |
33 help = 'input dataset') | 33 help = 'input dataset') |
34 | 34 |
35 parser.add_argument('-cy', '--cluster_type', | 35 parser.add_argument('-cy', '--cluster_type', |
36 type = str, | 36 type = str, |
37 choices = ['kmeans', 'meanshift', 'dbscan', 'hierarchy'], | 37 choices = ['kmeans', 'dbscan', 'hierarchy'], |
38 default = 'kmeans', | 38 default = 'kmeans', |
39 help = 'choose clustering algorythm') | 39 help = 'choose clustering algorythm') |
40 | 40 |
41 parser.add_argument('-k1', '--k_min', | 41 parser.add_argument('-k1', '--k_min', |
42 type = int, | 42 type = int, |
57 parser.add_argument('-si', '--silhouette', | 57 parser.add_argument('-si', '--silhouette', |
58 type = str, | 58 type = str, |
59 default = 'false', | 59 default = 'false', |
60 choices = ['true', 'false'], | 60 choices = ['true', 'false'], |
61 help = 'choose if you want silhouette plots') | 61 help = 'choose if you want silhouette plots') |
62 | |
63 parser.add_argument('-db', '--davies', | |
64 type = str, | |
65 default = 'false', | |
66 choices = ['true', 'false'], | |
67 help = 'choose if you want davies bouldin scores') | |
68 | 62 |
69 parser.add_argument('-td', '--tool_dir', | 63 parser.add_argument('-td', '--tool_dir', |
70 type = str, | 64 type = str, |
71 required = True, | 65 required = True, |
72 help = 'your tool directory') | 66 help = 'your tool directory') |
150 | 144 |
151 return best_index | 145 return best_index |
152 | 146 |
153 ################################ kmeans ##################################### | 147 ################################ kmeans ##################################### |
154 | 148 |
155 def kmeans (k_min, k_max, dataset, elbow, silhouette, davies, best_cluster): | 149 def kmeans (k_min, k_max, dataset, elbow, silhouette, best_cluster): |
156 if not os.path.exists('clustering'): | 150 if not os.path.exists('clustering'): |
157 os.makedirs('clustering') | 151 os.makedirs('clustering') |
158 | 152 |
159 | 153 |
160 if elbow == 'true': | 154 if elbow == 'true': |
165 if silhouette == 'true': | 159 if silhouette == 'true': |
166 silhouette = True | 160 silhouette = True |
167 else: | 161 else: |
168 silhouette = False | 162 silhouette = False |
169 | 163 |
170 if davies == 'true': | |
171 davies = True | |
172 else: | |
173 davies = False | |
174 | |
175 | |
176 range_n_clusters = [i for i in range(k_min, k_max+1)] | 164 range_n_clusters = [i for i in range(k_min, k_max+1)] |
177 distortions = [] | 165 distortions = [] |
178 scores = [] | 166 scores = [] |
179 all_labels = [] | 167 all_labels = [] |
180 | 168 |
339 if not os.path.exists('clustering'): | 327 if not os.path.exists('clustering'): |
340 os.makedirs('clustering') | 328 os.makedirs('clustering') |
341 | 329 |
342 plt.figure(figsize=(10, 7)) | 330 plt.figure(figsize=(10, 7)) |
343 plt.title("Customer Dendograms") | 331 plt.title("Customer Dendograms") |
344 shc.dendrogram(shc.linkage(dataset, method='ward')) | 332 shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist()) |
345 fig = plt.gcf() | 333 fig = plt.gcf() |
346 fig.savefig('clustering/dendogram.png', dpi=200) | 334 fig.savefig('clustering/dendogram.png', dpi=200) |
347 | 335 |
348 range_n_clusters = [i for i in range(k_min, k_max+1)] | 336 range_n_clusters = [i for i in range(k_min, k_max+1)] |
349 | 337 |
395 if tmp == None: | 383 if tmp == None: |
396 X = X.drop(columns=[i]) | 384 X = X.drop(columns=[i]) |
397 | 385 |
398 | 386 |
399 if args.cluster_type == 'kmeans': | 387 if args.cluster_type == 'kmeans': |
400 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster) | 388 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.best_cluster) |
401 | 389 |
402 if args.cluster_type == 'dbscan': | 390 if args.cluster_type == 'dbscan': |
403 dbscan(X, args.eps, args.min_samples, args.best_cluster) | 391 dbscan(X, args.eps, args.min_samples, args.best_cluster) |
404 | 392 |
405 if args.cluster_type == 'hierarchy': | 393 if args.cluster_type == 'hierarchy': |