Mercurial > repos > bimib > cobraxy
comparison COBRAxy/marea_cluster.py @ 309:38c9a958ea78 draft
Uploaded
author | francesco_lapi |
---|---|
date | Thu, 22 May 2025 16:03:37 +0000 |
parents | 4a677fc67aeb |
children | 4599fb23f25b |
comparison
equal
deleted
inserted
replaced
308:797d0e002934 | 309:38c9a958ea78 |
---|---|
57 default = 7, | 57 default = 7, |
58 help = 'choose maximum cluster number to be generated') | 58 help = 'choose maximum cluster number to be generated') |
59 | 59 |
60 parser.add_argument('-el', '--elbow', | 60 parser.add_argument('-el', '--elbow', |
61 type = str, | 61 type = str, |
62 default = 'false', | 62 default = 'False', |
63 choices = ['true', 'false'], | 63 choices = ['True', 'False'], |
64 help = 'choose if you want to generate an elbow plot for kmeans') | 64 help = 'choose if you want to generate an elbow plot for kmeans') |
65 | 65 |
66 parser.add_argument('-si', '--silhouette', | 66 parser.add_argument('-si', '--silhouette', |
67 type = str, | 67 type = str, |
68 default = 'false', | 68 default = 'False', |
69 choices = ['true', 'false'], | 69 choices = ['True', 'False'], |
70 help = 'choose if you want silhouette plots') | 70 help = 'choose if you want silhouette plots') |
71 | 71 |
72 parser.add_argument('-td', '--tool_dir', | 72 parser.add_argument('-td', '--tool_dir', |
73 type = str, | 73 type = str, |
74 required = True, | 74 required = True, |
75 help = 'your tool directory') | 75 help = 'your tool directory') |
76 | 76 |
77 parser.add_argument('-ms', '--min_samples', | 77 parser.add_argument('-ms', '--min_samples', |
78 type = float, | 78 type = int, |
79 help = 'min samples for dbscan (optional)') | 79 help = 'min samples for dbscan (optional)') |
80 | 80 |
81 parser.add_argument('-ep', '--eps', | 81 parser.add_argument('-ep', '--eps', |
82 type = float, | 82 type = float, |
83 help = 'eps for dbscan (optional)') | 83 help = 'eps for dbscan (optional)') |
104 s (str): The warning message to be logged and printed. | 104 s (str): The warning message to be logged and printed. |
105 | 105 |
106 Returns: | 106 Returns: |
107 None | 107 None |
108 """ | 108 """ |
109 args = process_args(sys.argv) | 109 |
110 with open(args.out_log, 'a') as log: | 110 with open(args.out_log, 'a') as log: |
111 log.write(s + "\n\n") | 111 log.write(s + "\n\n") |
112 print(s) | 112 print(s) |
113 | 113 |
114 ########################## read dataset ###################################### | 114 ########################## read dataset ###################################### |
211 | 211 |
212 Args: | 212 Args: |
213 k_min (int): The minimum number of clusters to consider. | 213 k_min (int): The minimum number of clusters to consider. |
214 k_max (int): The maximum number of clusters to consider. | 214 k_max (int): The maximum number of clusters to consider. |
215 dataset (pandas.DataFrame): The dataset to perform clustering on. | 215 dataset (pandas.DataFrame): The dataset to perform clustering on. |
216 elbow (str): Whether to generate an elbow plot for kmeans ('true' or 'false'). | 216 elbow (str): Whether to generate an elbow plot for kmeans ('True' or 'False'). |
217 silhouette (str): Whether to generate silhouette plots ('true' or 'false'). | 217 silhouette (str): Whether to generate silhouette plots ('True' or 'False'). |
218 best_cluster (str): The file path to save the output of the best cluster. | 218 best_cluster (str): The file path to save the output of the best cluster. |
219 | 219 |
220 Returns: | 220 Returns: |
221 None | 221 None |
222 """ | 222 """ |
223 if not os.path.exists(args.output_path): | 223 if not os.path.exists(args.output_path): |
224 os.makedirs(args.output_path) | 224 os.makedirs(args.output_path) |
225 | 225 |
226 | 226 |
227 if elbow == 'true': | 227 if elbow == 'True': |
228 elbow = True | 228 elbow = True |
229 else: | 229 else: |
230 elbow = False | 230 elbow = False |
231 | 231 |
232 if silhouette == 'true': | 232 if silhouette == 'True': |
233 silhouette = True | 233 silhouette = True |
234 else: | 234 else: |
235 silhouette = False | 235 silhouette = False |
236 | 236 |
237 range_n_clusters = [i for i in range(k_min, k_max+1)] | 237 range_n_clusters = [i for i in range(k_min, k_max+1)] |
441 Args: | 441 Args: |
442 dataset (pandas.DataFrame): The dataset to be clustered. | 442 dataset (pandas.DataFrame): The dataset to be clustered. |
443 k_min (int): The minimum number of clusters to consider. | 443 k_min (int): The minimum number of clusters to consider. |
444 k_max (int): The maximum number of clusters to consider. | 444 k_max (int): The maximum number of clusters to consider. |
445 best_cluster (str): The file path to save the output of the best cluster. | 445 best_cluster (str): The file path to save the output of the best cluster. |
446 silhouette (str): Whether to generate silhouette plots ('true' or 'false'). | 446 silhouette (str): Whether to generate silhouette plots ('True' or 'False'). |
447 | 447 |
448 Returns: | 448 Returns: |
449 None | 449 None |
450 """ | 450 """ |
451 if not os.path.exists(args.output_path): | 451 if not os.path.exists(args.output_path): |
475 | 475 |
476 for i in range(len(labels)): | 476 for i in range(len(labels)): |
477 prefix = '' | 477 prefix = '' |
478 if (i + k_min == best): | 478 if (i + k_min == best): |
479 prefix = '_BEST' | 479 prefix = '_BEST' |
480 if silhouette == 'true': | 480 if silhouette == 'True': |
481 silhouette_draw(dataset, labels[i], i + k_min, f'{args.output_path}/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png') | 481 silhouette_draw(dataset, labels[i], i + k_min, f'{args.output_path}/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png') |
482 | 482 |
483 for i in range(len(labels)): | 483 for i in range(len(labels)): |
484 if (i + k_min == best): | 484 if (i + k_min == best): |
485 labels = labels[i] | 485 labels = labels[i] |