Mercurial > repos > bimib > cobraxy
diff COBRAxy/marea_cluster.py @ 428:8cd0c70b0084 draft
Uploaded
author | francesco_lapi |
---|---|
date | Wed, 10 Sep 2025 13:21:41 +0000 |
parents | 1032cb1028f1 |
children | 06564187fba3 |
line wrap: on
line diff
--- a/COBRAxy/marea_cluster.py Wed Sep 10 11:38:08 2025 +0000 +++ b/COBRAxy/marea_cluster.py Wed Sep 10 13:21:41 2025 +0000 @@ -47,6 +47,12 @@ default = 'kmeans', help = 'choose clustering algorythm') + parser.add_argument('-sc', '--scaling', + type = str, + choices = ['true', 'false'], + default = 'true', + help = 'choose if you want to scaling the data') + parser.add_argument('-k1', '--k_min', type = int, default = 2, @@ -514,6 +520,21 @@ if any(val is None or np.isnan(val) for val in X[i]): X = X.drop(columns=[i]) + if args.scaling == True: + list_to_remove = [] + toll_std=1e-8 + for i in X.columns: + mean_i = X[i].mean() + std_i = X[i].std() + if std_i >toll_std: + #scaling with mean 0 and std 1 + X[i] = (X[i]-mean_i)/std_i + else: + #remove feature because std = 0 during clustering + list_to_remove.append(i) + if len(list_to_remove)>0: + X = X.drop(columns=list_to_remove) + if args.k_max != None: numero_classi = X.shape[0] while args.k_max >= numero_classi: