# HG changeset patch # User bimib # Date 1575394470 18000 # Node ID b3f9e13bf15feb49ddf2826d4db7badcd2ab4059 # Parent 9e02d127887a7d11fe4aa06418b35807ef860acc Uploaded diff -r 9e02d127887a -r b3f9e13bf15f Marea/marea_cluster.py --- a/Marea/marea_cluster.py Mon Nov 25 12:05:33 2019 -0500 +++ b/Marea/marea_cluster.py Tue Dec 03 12:34:30 2019 -0500 @@ -9,7 +9,7 @@ import os from sklearn.datasets import make_blobs from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering -from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster +from sklearn.metrics import silhouette_samples, silhouette_score, cluster import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt @@ -328,7 +328,7 @@ os.makedirs('clustering') plt.figure(figsize=(10, 7)) - plt.title("Classes Dendogram") + plt.title("Customer Dendograms") shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist()) fig = plt.gcf() fig.savefig('clustering/dendogram.png', dpi=200) @@ -338,13 +338,15 @@ scores = [] labels = [] - for n_clusters in range_n_clusters: + n_classi = dataset.shape[0] + + for n_clusters in range_n_clusters: cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward') cluster.fit_predict(dataset) cluster_labels = cluster.labels_ labels.append(cluster_labels) write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv') - + best = max_index(scores) + k_min for i in range(len(labels)): @@ -382,6 +384,13 @@ tmp = X[i][0] if tmp == None: X = X.drop(columns=[i]) + + if args.k_max != None: + numero_classi = X.shape[0] + while args.k_max >= numero_classi: + err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset' + warning(err) + args.k_max = args.k_max - 1 if args.cluster_type == 'kmeans': diff -r 9e02d127887a -r b3f9e13bf15f Marea/marea_cluster.xml --- a/Marea/marea_cluster.xml Mon Nov 25 12:05:33 2019 -0500 +++ b/Marea/marea_cluster.xml Tue Dec 03 12:34:30 2019 -0500 @@ -1,4 +1,4 @@ - + marea_macros.xml