Mercurial > repos > bimib > cobraxy
comparison COBRAxy/marea_cluster.py @ 428:8cd0c70b0084 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Wed, 10 Sep 2025 13:21:41 +0000 |
| parents | 1032cb1028f1 |
| children | 06564187fba3 |
comparison
equal
deleted
inserted
replaced
| 427:4a385fdb9e58 | 428:8cd0c70b0084 |
|---|---|
| 45 type = str, | 45 type = str, |
| 46 choices = ['kmeans', 'dbscan', 'hierarchy'], | 46 choices = ['kmeans', 'dbscan', 'hierarchy'], |
| 47 default = 'kmeans', | 47 default = 'kmeans', |
| 48 help = 'choose clustering algorythm') | 48 help = 'choose clustering algorythm') |
| 49 | 49 |
| 50 parser.add_argument('-sc', '--scaling', | |
| 51 type = str, | |
| 52 choices = ['true', 'false'], | |
| 53 default = 'true', | |
| 54 help = 'choose if you want to scaling the data') | |
| 55 | |
| 50 parser.add_argument('-k1', '--k_min', | 56 parser.add_argument('-k1', '--k_min', |
| 51 type = int, | 57 type = int, |
| 52 default = 2, | 58 default = 2, |
| 53 help = 'choose minimun cluster number to be generated') | 59 help = 'choose minimun cluster number to be generated') |
| 54 | 60 |
| 512 | 518 |
| 513 for i in X.columns: | 519 for i in X.columns: |
| 514 if any(val is None or np.isnan(val) for val in X[i]): | 520 if any(val is None or np.isnan(val) for val in X[i]): |
| 515 X = X.drop(columns=[i]) | 521 X = X.drop(columns=[i]) |
| 516 | 522 |
| 523 if args.scaling == True: | |
| 524 list_to_remove = [] | |
| 525 toll_std=1e-8 | |
| 526 for i in X.columns: | |
| 527 mean_i = X[i].mean() | |
| 528 std_i = X[i].std() | |
| 529 if std_i >toll_std: | |
| 530 #scaling with mean 0 and std 1 | |
| 531 X[i] = (X[i]-mean_i)/std_i | |
| 532 else: | |
| 533 #remove feature because std = 0 during clustering | |
| 534 list_to_remove.append(i) | |
| 535 if len(list_to_remove)>0: | |
| 536 X = X.drop(columns=list_to_remove) | |
| 537 | |
| 517 if args.k_max != None: | 538 if args.k_max != None: |
| 518 numero_classi = X.shape[0] | 539 numero_classi = X.shape[0] |
| 519 while args.k_max >= numero_classi: | 540 while args.k_max >= numero_classi: |
| 520 err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset' | 541 err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset' |
| 521 warning(err) | 542 warning(err) |
