marea: Marea/marea_cluster.py comparison

comparison Marea/marea_cluster.py @ 33:abf0bfe01c78 draft

Uploaded

author	bimib
date	Wed, 16 Oct 2019 16:25:56 -0400
parents	944e15aa970a
children	1a97d1537623

comparison

equal deleted inserted replaced

-:b795e3e163e0
+:abf0bfe01c78
 type = str,
 required = True,
 help = 'your tool directory')
 parser.add_argument('-ms', '--min_samples',
-type = int,
+type = float,
 help = 'min samples for dbscan (optional)')
 parser.add_argument('-ep', '--eps',
-type = int,
+type = float,
 help = 'eps for dbscan (optional)')
 parser.add_argument('-bc', '--best_cluster',
 type = str,
 help = 'output of best cluster tsv')
 plt.savefig(path, bbox_inches='tight')
 ######################## dbscan ##############################################
-def dbscan(dataset, eps, min_samples):
+def dbscan(dataset, eps, min_samples, best_cluster):
 if not os.path.exists('clustering'):
 os.makedirs('clustering')
 if eps is not None:
 	clusterer = DBSCAN(eps = eps, min_samples = min_samples)
 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
 ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL
+labels = labels
-write_to_csv(dataset, labels, 'clustering/dbscan_results.tsv')
+predict = [x+1 for x in labels]
+classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
+classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
 ########################## hierachical #######################################
-def hierachical_agglomerative(dataset, k_min, k_max):
+def hierachical_agglomerative(dataset, k_min, k_max, best_cluster):
 if not os.path.exists('clustering'):
 os.makedirs('clustering')
 plt.figure(figsize=(10, 7))
 fig = plt.gcf()
 fig.savefig('clustering/dendogram.png', dpi=200)
 range_n_clusters = [i for i in range(k_min, k_max+1)]
-for n_clusters in range_n_clusters:
+scores = []
+labels = []
+for n_clusters in range_n_clusters:
 cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')
 cluster.fit_predict(dataset)
 cluster_labels = cluster.labels_
+labels.append(cluster_labels)
 silhouette_avg = silhouette_score(dataset, cluster_labels)
 write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
+scores.append(silhouette_avg)
 #warning("For n_clusters =", n_clusters,
 #"The average silhouette_score is :", silhouette_avg)
+best = max_index(scores) + k_min
+for i in range(len(labels)):
+if (i + k_min == best):
+labels = labels[i]
+predict = [x+1 for x in labels]
+classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
+classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
 if args.cluster_type == 'kmeans':
 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster)
 if args.cluster_type == 'dbscan':
-dbscan(X, args.eps, args.min_samples)
+dbscan(X, args.eps, args.min_samples, args.best_cluster)
 if args.cluster_type == 'hierarchy':
-hierachical_agglomerative(X, args.k_min, args.k_max)
+hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster)
 ##############################################################################
 if __name__ == "__main__":
 main()

Mercurial > repos > bimib > marea

comparison Marea/marea_cluster.py @ 33:abf0bfe01c78 draft