Mercurial > repos > bimib > marea
comparison Marea/marea_cluster.py @ 42:b3f9e13bf15f draft
Uploaded
| author | bimib |
|---|---|
| date | Tue, 03 Dec 2019 12:34:30 -0500 |
| parents | 2a082b4aed02 |
| children |
comparison
equal
deleted
inserted
replaced
| 41:9e02d127887a | 42:b3f9e13bf15f |
|---|---|
| 7 import sys | 7 import sys |
| 8 import argparse | 8 import argparse |
| 9 import os | 9 import os |
| 10 from sklearn.datasets import make_blobs | 10 from sklearn.datasets import make_blobs |
| 11 from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering | 11 from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering |
| 12 from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster | 12 from sklearn.metrics import silhouette_samples, silhouette_score, cluster |
| 13 import matplotlib | 13 import matplotlib |
| 14 matplotlib.use('agg') | 14 matplotlib.use('agg') |
| 15 import matplotlib.pyplot as plt | 15 import matplotlib.pyplot as plt |
| 16 import scipy.cluster.hierarchy as shc | 16 import scipy.cluster.hierarchy as shc |
| 17 import matplotlib.cm as cm | 17 import matplotlib.cm as cm |
| 326 | 326 |
| 327 if not os.path.exists('clustering'): | 327 if not os.path.exists('clustering'): |
| 328 os.makedirs('clustering') | 328 os.makedirs('clustering') |
| 329 | 329 |
| 330 plt.figure(figsize=(10, 7)) | 330 plt.figure(figsize=(10, 7)) |
| 331 plt.title("Classes Dendogram") | 331 plt.title("Customer Dendograms") |
| 332 shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist()) | 332 shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist()) |
| 333 fig = plt.gcf() | 333 fig = plt.gcf() |
| 334 fig.savefig('clustering/dendogram.png', dpi=200) | 334 fig.savefig('clustering/dendogram.png', dpi=200) |
| 335 | 335 |
| 336 range_n_clusters = [i for i in range(k_min, k_max+1)] | 336 range_n_clusters = [i for i in range(k_min, k_max+1)] |
| 337 | 337 |
| 338 scores = [] | 338 scores = [] |
| 339 labels = [] | 339 labels = [] |
| 340 | 340 |
| 341 for n_clusters in range_n_clusters: | 341 n_classi = dataset.shape[0] |
| 342 | |
| 343 for n_clusters in range_n_clusters: | |
| 342 cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward') | 344 cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward') |
| 343 cluster.fit_predict(dataset) | 345 cluster.fit_predict(dataset) |
| 344 cluster_labels = cluster.labels_ | 346 cluster_labels = cluster.labels_ |
| 345 labels.append(cluster_labels) | 347 labels.append(cluster_labels) |
| 346 write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv') | 348 write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv') |
| 347 | 349 |
| 348 best = max_index(scores) + k_min | 350 best = max_index(scores) + k_min |
| 349 | 351 |
| 350 for i in range(len(labels)): | 352 for i in range(len(labels)): |
| 351 prefix = '' | 353 prefix = '' |
| 352 if (i + k_min == best): | 354 if (i + k_min == best): |
| 380 | 382 |
| 381 for i in X.columns: | 383 for i in X.columns: |
| 382 tmp = X[i][0] | 384 tmp = X[i][0] |
| 383 if tmp == None: | 385 if tmp == None: |
| 384 X = X.drop(columns=[i]) | 386 X = X.drop(columns=[i]) |
| 387 | |
| 388 if args.k_max != None: | |
| 389 numero_classi = X.shape[0] | |
| 390 while args.k_max >= numero_classi: | |
| 391 err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset' | |
| 392 warning(err) | |
| 393 args.k_max = args.k_max - 1 | |
| 385 | 394 |
| 386 | 395 |
| 387 if args.cluster_type == 'kmeans': | 396 if args.cluster_type == 'kmeans': |
| 388 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.best_cluster) | 397 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.best_cluster) |
| 389 | 398 |
