Mercurial > repos > bimib > marea
comparison Marea/marea_cluster.py @ 10:2405255d6a09 draft
Uploaded
author | bimib |
---|---|
date | Wed, 13 Feb 2019 04:46:14 -0500 |
parents | 7c76e8e319c2 |
children | 3d77287caf22 |
comparison
equal
deleted
inserted
replaced
9:7c76e8e319c2 | 10:2405255d6a09 |
---|---|
538 ################################# clustering ################################## | 538 ################################# clustering ################################## |
539 | 539 |
540 def f_cluster(resolve_rules): | 540 def f_cluster(resolve_rules): |
541 os.makedirs('cluster_out') | 541 os.makedirs('cluster_out') |
542 args = process_args(sys.argv) | 542 args = process_args(sys.argv) |
543 k_min = args.k_min | |
544 k_max = args.k_max | |
545 if k_min > k_max: | |
546 warning('k range boundaries inverted.\n') | |
547 tmp = k_min | |
548 k_min = k_max | |
549 k_max = tmp | |
550 else: | |
551 warning('k range correct.\n') | |
543 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index') | 552 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index') |
544 for i in cluster_data.columns: | 553 for i in cluster_data.columns: |
545 tmp = cluster_data[i][0] | 554 tmp = cluster_data[i][0] |
546 if tmp == None: | 555 if tmp == None: |
547 cluster_data = cluster_data.drop(columns=[i]) | 556 cluster_data = cluster_data.drop(columns=[i]) |
548 distorsion = [] | 557 distorsion = [] |
549 for i in range(args.k_min, args.k_max+1): | 558 for i in range(k_min, k_max+1): |
550 tmp_kmeans = KMeans(n_clusters = i, | 559 tmp_kmeans = KMeans(n_clusters = i, |
551 n_init = 100, | 560 n_init = 100, |
552 max_iter = 300, | 561 max_iter = 300, |
553 random_state = 0).fit(cluster_data) | 562 random_state = 0).fit(cluster_data) |
554 distorsion.append(tmp_kmeans.inertia_) | 563 distorsion.append(tmp_kmeans.inertia_) |
557 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str) | 566 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str) |
558 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv' | 567 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv' |
559 classe.to_csv(dest, sep = '\t', index = False, | 568 classe.to_csv(dest, sep = '\t', index = False, |
560 header = ['Patient_ID', 'Class']) | 569 header = ['Patient_ID', 'Class']) |
561 plt.figure(0) | 570 plt.figure(0) |
562 plt.plot(range(args.k_min, args.k_max+1), distorsion, marker = 'o') | 571 plt.plot(range(k_min, k_max+1), distorsion, marker = 'o') |
563 plt.xlabel('Number of cluster') | 572 plt.xlabel('Number of cluster') |
564 plt.ylabel('Distorsion') | 573 plt.ylabel('Distorsion') |
565 plt.savefig(args.elbow, dpi = 240, format = 'pdf') | 574 plt.savefig(args.elbow, dpi = 240, format = 'pdf') |
566 if args.cond_hier == 'yes': | 575 if args.cond_hier == 'yes': |
567 import scipy.cluster.hierarchy as hier | 576 import scipy.cluster.hierarchy as hier |
574 | 583 |
575 ################################# main ######################################## | 584 ################################# main ######################################## |
576 | 585 |
577 def main(): | 586 def main(): |
578 args = process_args(sys.argv) | 587 args = process_args(sys.argv) |
579 if args.k_min > args.k_max: | |
580 warning('k range boundaries inverted.') | |
581 tmp = args.k_min | |
582 args.k_min = args.k_max | |
583 args.k_max = tmp | |
584 else: | |
585 warning('k range correct.') | |
586 if args.rules_selector == 'HMRcore': | 588 if args.rules_selector == 'HMRcore': |
587 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | 589 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) |
588 elif args.rules_selector == 'Recon': | 590 elif args.rules_selector == 'Recon': |
589 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | 591 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) |
590 elif args.rules_selector == 'Custom': | 592 elif args.rules_selector == 'Custom': |