comparison evaluation.py @ 17:f93c868203cc draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResults commit 4406735e44aba20859c252be39f4e99df28c7a92
author rnateam
date Sat, 27 Oct 2018 13:23:06 -0400
parents 79df97a1bc0f
children
comparison
equal deleted inserted replaced
16:79df97a1bc0f 17:f93c868203cc
1 #!/usr/bin/env python2 1 #!/usr/bin/env python
2 import glob 2 import glob
3 from os import system 3 from os import system
4 import re 4 import re
5 from sklearn import metrics 5 from sklearn import metrics
6 from shutil import make_archive 6 from shutil import make_archive
10 def sh(script): 10 def sh(script):
11 system("bash -c '%s'" % script) 11 system("bash -c '%s'" % script)
12 12
13 fasta_dir = sys.argv[1] 13 fasta_dir = sys.argv[1]
14 results_dir = sys.argv[2] 14 results_dir = sys.argv[2]
15 dataNames = fasta_dir+"/data.names" 15 dataNames = os.path.join(fasta_dir,"data.names")
16 16
17 listOfClusters = [] 17 listOfClusters = []
18 listOfHeaders = [] 18 listOfHeaders = []
19 headersNames = set() 19 headersNames = set()
20 idsNames = set() 20 idsNames = set()
52 blackList.append(idd) 52 blackList.append(idd)
53 53
54 numberOfClusters += 1 # 1 cluster for all unassigned seqs 54 numberOfClusters += 1 # 1 cluster for all unassigned seqs
55 ignoreBlackList = False 55 ignoreBlackList = False
56 with open(dataNames, "r") as names: 56 with open(dataNames, "r") as names:
57 for line in names.readlines(): 57 for line in names:
58 splits = line.split() 58 splits = line.split()
59 fullUniqeId = splits[3] 59 fullUniqeId = splits[3]
60 fullHeader = '' 60 fullHeader = ''
61 fullID = '' 61 fullID = ''
62 if len(splits) >= 6: 62 if len(splits) >= 6:
70 # else: 70 # else:
71 # print ("Skip header", fullHeader) 71 # print ("Skip header", fullHeader)
72 72
73 toWrite = "" 73 toWrite = ""
74 for i in range(len(listOfClusters)): 74 for i in range(len(listOfClusters)):
75 toWrite += listOfHeaders[i] + "\t" + listOfClusters[i] + '\n' 75 toWrite += "%s\t%s\n" % (listOfHeaders[i], listOfClusters[i])
76 76
77 with open(results_dir+"/fullTab.tabular", "w") as full: 77 with open(os.path.join(results_dir,"fullTab.tabular"), "w") as full:
78 full.write(toWrite) 78 full.write(toWrite)
79 79
80 80
81 pattern = re.compile("^RF.*$") 81 pattern = re.compile("^RF.*$")
82 82
86 homogeneity_score = metrics.homogeneity_score(listOfHeaders, listOfClusters) 86 homogeneity_score = metrics.homogeneity_score(listOfHeaders, listOfClusters)
87 adjusted_rand_score = metrics.adjusted_rand_score(listOfHeaders, listOfClusters) 87 adjusted_rand_score = metrics.adjusted_rand_score(listOfHeaders, listOfClusters)
88 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfHeaders, listOfClusters) 88 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfHeaders, listOfClusters)
89 v_measure_score = metrics.v_measure_score(listOfHeaders, listOfClusters) 89 v_measure_score = metrics.v_measure_score(listOfHeaders, listOfClusters)
90 90
91 toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score) 91 toWrite = "completeness_score : {}\n".format(completeness_score)
92 toWrite += "homogeneity_score : {}\n".format(homogeneity_score)
93 toWrite += "adjusted_rand_score : {}\n".format(adjusted_rand_score)
94 toWrite += "adjusted_mutual_info_score : {}\n".format(adjusted_mutual_info_score)
95 toWrite += "v_measure_score : {}\n".format(v_measure_score)
92 96
93 97
94 else: 98 else:
95 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA" 99 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA"
96 100