Mercurial > repos > bimib > marea
comparison Marea/marea_cluster.py @ 12:3d77287caf22 draft
Uploaded
author | bimib |
---|---|
date | Wed, 13 Feb 2019 05:08:25 -0500 |
parents | 2405255d6a09 |
children | e96f3b85e5a0 |
comparison
equal
deleted
inserted
replaced
11:d185506631e1 | 12:3d77287caf22 |
---|---|
538 ################################# clustering ################################## | 538 ################################# clustering ################################## |
539 | 539 |
540 def f_cluster(resolve_rules): | 540 def f_cluster(resolve_rules): |
541 os.makedirs('cluster_out') | 541 os.makedirs('cluster_out') |
542 args = process_args(sys.argv) | 542 args = process_args(sys.argv) |
543 k_min = args.k_min | |
544 k_max = args.k_max | |
545 if k_min > k_max: | |
546 warning('k range boundaries inverted.\n') | |
547 tmp = k_min | |
548 k_min = k_max | |
549 k_max = tmp | |
550 else: | |
551 warning('k range correct.\n') | |
552 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index') | 543 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index') |
553 for i in cluster_data.columns: | 544 for i in cluster_data.columns: |
554 tmp = cluster_data[i][0] | 545 tmp = cluster_data[i][0] |
555 if tmp == None: | 546 if tmp == None: |
556 cluster_data = cluster_data.drop(columns=[i]) | 547 cluster_data = cluster_data.drop(columns=[i]) |
557 distorsion = [] | 548 distorsion = [] |
558 for i in range(k_min, k_max+1): | 549 for i in range(args.k_min, args.k_max+1): |
559 tmp_kmeans = KMeans(n_clusters = i, | 550 tmp_kmeans = KMeans(n_clusters = i, |
560 n_init = 100, | 551 n_init = 100, |
561 max_iter = 300, | 552 max_iter = 300, |
562 random_state = 0).fit(cluster_data) | 553 random_state = 0).fit(cluster_data) |
563 distorsion.append(tmp_kmeans.inertia_) | 554 distorsion.append(tmp_kmeans.inertia_) |
566 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str) | 557 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str) |
567 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv' | 558 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv' |
568 classe.to_csv(dest, sep = '\t', index = False, | 559 classe.to_csv(dest, sep = '\t', index = False, |
569 header = ['Patient_ID', 'Class']) | 560 header = ['Patient_ID', 'Class']) |
570 plt.figure(0) | 561 plt.figure(0) |
571 plt.plot(range(k_min, k_max+1), distorsion, marker = 'o') | 562 plt.plot(range(args.k_min, args.k_max+1), distorsion, marker = 'o') |
572 plt.xlabel('Number of cluster') | 563 plt.xlabel('Number of cluster') |
573 plt.ylabel('Distorsion') | 564 plt.ylabel('Distorsion') |
574 plt.savefig(args.elbow, dpi = 240, format = 'pdf') | 565 plt.savefig(args.elbow, dpi = 240, format = 'pdf') |
575 if args.cond_hier == 'yes': | 566 if args.cond_hier == 'yes': |
576 import scipy.cluster.hierarchy as hier | 567 import scipy.cluster.hierarchy as hier |
583 | 574 |
584 ################################# main ######################################## | 575 ################################# main ######################################## |
585 | 576 |
586 def main(): | 577 def main(): |
587 args = process_args(sys.argv) | 578 args = process_args(sys.argv) |
579 if args.k_min > args.k_max: | |
580 sys.exit('Execution aborted: max cluster > min cluster') | |
588 if args.rules_selector == 'HMRcore': | 581 if args.rules_selector == 'HMRcore': |
589 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | 582 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) |
590 elif args.rules_selector == 'Recon': | 583 elif args.rules_selector == 'Recon': |
591 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | 584 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) |
592 elif args.rules_selector == 'Custom': | 585 elif args.rules_selector == 'Custom': |