cobraxy: COBRAxy/marea_cluster.py comparison

comparison COBRAxy/marea_cluster.py @ 147:3fca9b568faf draft

Uploaded

author	bimib
date	Wed, 06 Nov 2024 13:57:24 +0000
parents	41f35c2f0c7b
children	7f3552eaf774

comparison

equal deleted inserted replaced

-:88cf4543e210
+:3fca9b568faf
 import scipy.cluster.hierarchy as shc
 import matplotlib.cm as cm
 from typing import Optional, Dict, List
 ################################# process args ###############################
-def process_args(args :List[str]) -> argparse.Namespace:
+def process_args(args :List[str] = None) -> argparse.Namespace:
 """
 Processes command-line arguments.
 Args:
 args (list): List of command-line arguments.
 parser.add_argument('-bc', '--best_cluster',
 type = str,
 help = 'output of best cluster tsv')
+parser.add_argument(
+'-idop', '--output_path',
-args = parser.parse_args()
+type = str,
+default='result',
+help = 'output path for maps')
+args = parser.parse_args(args)
 return args
 ########################### warning ###########################################
 def warning(s :str) -> None:
 """
 best_cluster (str): The file path to save the output of the best cluster.
 Returns:
 None
 """
-if not os.path.exists('clustering'):
+if not os.path.exists(args.output_path):
-os.makedirs('clustering')
+os.makedirs(args.output_path)
 if elbow == 'true':
 elbow = True
 else:
 for i in range(len(all_labels)):
 prefix = ''
 if (i + k_min == best):
 prefix = '_BEST'
-write_to_csv(dataset, all_labels[i], 'clustering/kmeans_with_' + str(i + k_min) + prefix + '_clusters.tsv')
+write_to_csv(dataset, all_labels[i], f'{args.output_path}/kmeans_with_' + str(i + k_min) + prefix + '_clusters.tsv')
 if (prefix == '_BEST'):
 labels = all_labels[i]
 predict = [x+1 for x in labels]
 if silhouette:
-silhouette_draw(dataset, all_labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
+silhouette_draw(dataset, all_labels[i], i + k_min, f'{args.output_path}/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
 if elbow:
 elbow_plot(distortions, k_min,k_max)
 x = list(range(k_min, k_max + 1))
 x.insert(0, 1)
 plt.plot(x, distortions, marker = 'o')
 plt.xlabel('Number of clusters (k)')
 plt.ylabel('Distortion')
-s = 'clustering/elbow_plot.png'
+s = f'{args.output_path}/elbow_plot.png'
 fig = plt.gcf()
 fig.set_size_inches(18.5, 10.5, forward = True)
 fig.savefig(s, dpi=100)
 best_cluster (str): The file path to save the output of the best cluster.
 Returns:
 None
 """
-if not os.path.exists('clustering'):
+if not os.path.exists(args.output_path):
-os.makedirs('clustering')
+os.makedirs(args.output_path)
 if eps is not None:
 clusterer = DBSCAN(eps = eps, min_samples = min_samples)
 else:
 clusterer = DBSCAN()
 silhouette (str): Whether to generate silhouette plots ('true' or 'false').
 Returns:
 None
 """
-if not os.path.exists('clustering'):
+if not os.path.exists(args.output_path):
-os.makedirs('clustering')
+os.makedirs(args.output_path)
 plt.figure(figsize=(10, 7))
 plt.title("Customer Dendograms")
 shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist())
 fig = plt.gcf()
-fig.savefig('clustering/dendogram.png', dpi=200)
+fig.savefig(f'{args.output_path}/dendogram.png', dpi=200)
 range_n_clusters = [i for i in range(k_min, k_max+1)]
 scores = []
 labels = []
 for n_clusters in range_n_clusters:
 cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')
 cluster.fit_predict(dataset)
 cluster_labels = cluster.labels_
 labels.append(cluster_labels)
-write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
+write_to_csv(dataset, cluster_labels, f'{args.output_path}/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
 best = max_index(scores) + k_min
 for i in range(len(labels)):
 prefix = ''
 if (i + k_min == best):
 prefix = '_BEST'
 if silhouette == 'true':
-silhouette_draw(dataset, labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
+silhouette_draw(dataset, labels[i], i + k_min, f'{args.output_path}/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
 for i in range(len(labels)):
 if (i + k_min == best):
 labels = labels[i]
 predict = [x+1 for x in labels]
 classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
 classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
 ############################# main ###########################################
-def main() -> None:
+def main(args_in:List[str] = None) -> None:
 """
 Initializes everything and sets the program in motion based on the fronted input arguments.
 Returns:
 None
 """
-if not os.path.exists('clustering'):
+global args
-os.makedirs('clustering')
+args = process_args(args_in)
-args = process_args(sys.argv)
+if not os.path.exists(args.output_path):
+os.makedirs(args.output_path)
 #Data read
 X = read_dataset(args.input)
 X = pd.DataFrame.to_dict(X, orient='list')

Mercurial > repos > bimib > cobraxy

comparison COBRAxy/marea_cluster.py @ 147:3fca9b568faf draft