comparison Marea/marea_cluster.py @ 12:3d77287caf22 draft

Uploaded
author bimib
date Wed, 13 Feb 2019 05:08:25 -0500
parents 2405255d6a09
children e96f3b85e5a0
comparison
equal deleted inserted replaced
11:d185506631e1 12:3d77287caf22
538 ################################# clustering ################################## 538 ################################# clustering ##################################
539 539
540 def f_cluster(resolve_rules): 540 def f_cluster(resolve_rules):
541 os.makedirs('cluster_out') 541 os.makedirs('cluster_out')
542 args = process_args(sys.argv) 542 args = process_args(sys.argv)
543 k_min = args.k_min
544 k_max = args.k_max
545 if k_min > k_max:
546 warning('k range boundaries inverted.\n')
547 tmp = k_min
548 k_min = k_max
549 k_max = tmp
550 else:
551 warning('k range correct.\n')
552 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index') 543 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index')
553 for i in cluster_data.columns: 544 for i in cluster_data.columns:
554 tmp = cluster_data[i][0] 545 tmp = cluster_data[i][0]
555 if tmp == None: 546 if tmp == None:
556 cluster_data = cluster_data.drop(columns=[i]) 547 cluster_data = cluster_data.drop(columns=[i])
557 distorsion = [] 548 distorsion = []
558 for i in range(k_min, k_max+1): 549 for i in range(args.k_min, args.k_max+1):
559 tmp_kmeans = KMeans(n_clusters = i, 550 tmp_kmeans = KMeans(n_clusters = i,
560 n_init = 100, 551 n_init = 100,
561 max_iter = 300, 552 max_iter = 300,
562 random_state = 0).fit(cluster_data) 553 random_state = 0).fit(cluster_data)
563 distorsion.append(tmp_kmeans.inertia_) 554 distorsion.append(tmp_kmeans.inertia_)
566 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str) 557 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str)
567 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv' 558 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv'
568 classe.to_csv(dest, sep = '\t', index = False, 559 classe.to_csv(dest, sep = '\t', index = False,
569 header = ['Patient_ID', 'Class']) 560 header = ['Patient_ID', 'Class'])
570 plt.figure(0) 561 plt.figure(0)
571 plt.plot(range(k_min, k_max+1), distorsion, marker = 'o') 562 plt.plot(range(args.k_min, args.k_max+1), distorsion, marker = 'o')
572 plt.xlabel('Number of cluster') 563 plt.xlabel('Number of cluster')
573 plt.ylabel('Distorsion') 564 plt.ylabel('Distorsion')
574 plt.savefig(args.elbow, dpi = 240, format = 'pdf') 565 plt.savefig(args.elbow, dpi = 240, format = 'pdf')
575 if args.cond_hier == 'yes': 566 if args.cond_hier == 'yes':
576 import scipy.cluster.hierarchy as hier 567 import scipy.cluster.hierarchy as hier
583 574
584 ################################# main ######################################## 575 ################################# main ########################################
585 576
586 def main(): 577 def main():
587 args = process_args(sys.argv) 578 args = process_args(sys.argv)
579 if args.k_min > args.k_max:
580 sys.exit('Execution aborted: max cluster > min cluster')
588 if args.rules_selector == 'HMRcore': 581 if args.rules_selector == 'HMRcore':
589 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) 582 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
590 elif args.rules_selector == 'Recon': 583 elif args.rules_selector == 'Recon':
591 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) 584 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
592 elif args.rules_selector == 'Custom': 585 elif args.rules_selector == 'Custom':