Mercurial > repos > bimib > marea
diff Marea/marea_cluster.py @ 33:abf0bfe01c78 draft
Uploaded
author | bimib |
---|---|
date | Wed, 16 Oct 2019 16:25:56 -0400 |
parents | 944e15aa970a |
children | 1a97d1537623 |
line wrap: on
line diff
--- a/Marea/marea_cluster.py Wed Oct 16 07:12:37 2019 -0400 +++ b/Marea/marea_cluster.py Wed Oct 16 16:25:56 2019 -0400 @@ -72,11 +72,11 @@ help = 'your tool directory') parser.add_argument('-ms', '--min_samples', - type = int, + type = float, help = 'min samples for dbscan (optional)') parser.add_argument('-ep', '--eps', - type = int, + type = float, help = 'eps for dbscan (optional)') parser.add_argument('-bc', '--best_cluster', @@ -310,7 +310,7 @@ ######################## dbscan ############################################## -def dbscan(dataset, eps, min_samples): +def dbscan(dataset, eps, min_samples, best_cluster): if not os.path.exists('clustering'): os.makedirs('clustering') @@ -331,12 +331,15 @@ ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL - - write_to_csv(dataset, labels, 'clustering/dbscan_results.tsv') + labels = labels + predict = [x+1 for x in labels] + classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str) + classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class']) + ########################## hierachical ####################################### -def hierachical_agglomerative(dataset, k_min, k_max): +def hierachical_agglomerative(dataset, k_min, k_max, best_cluster): if not os.path.exists('clustering'): os.makedirs('clustering') @@ -349,16 +352,28 @@ range_n_clusters = [i for i in range(k_min, k_max+1)] - for n_clusters in range_n_clusters: - + scores = [] + labels = [] + for n_clusters in range_n_clusters: cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward') cluster.fit_predict(dataset) cluster_labels = cluster.labels_ - + labels.append(cluster_labels) silhouette_avg = silhouette_score(dataset, cluster_labels) write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv') + scores.append(silhouette_avg) #warning("For n_clusters =", n_clusters, #"The average silhouette_score is :", silhouette_avg) + + best = max_index(scores) + k_min + + for i in range(len(labels)): + if (i + k_min == best): + labels = labels[i] + predict = [x+1 for x in labels] + classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str) + classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class']) + @@ -390,10 +405,10 @@ kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster) if args.cluster_type == 'dbscan': - dbscan(X, args.eps, args.min_samples) + dbscan(X, args.eps, args.min_samples, args.best_cluster) if args.cluster_type == 'hierarchy': - hierachical_agglomerative(X, args.k_min, args.k_max) + hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster) ##############################################################################