Mercurial > repos > rnateam > graphclust_postprocessing_no_align
comparison evaluation.py @ 0:0a48b2db75e7 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResultsNoAlign commit 2a6fd70c1bcec36ffdf0bba2ec82489b39cfc84e
author | rnateam |
---|---|
date | Sat, 27 Oct 2018 13:49:00 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0a48b2db75e7 |
---|---|
1 #!/usr/bin/env python | |
2 import glob | |
3 from os import system | |
4 import re | |
5 from sklearn import metrics | |
6 from shutil import make_archive | |
7 import sys | |
8 import fnmatch, os | |
9 | |
10 def sh(script): | |
11 system("bash -c '%s'" % script) | |
12 | |
13 fasta_dir = sys.argv[1] | |
14 results_dir = sys.argv[2] | |
15 dataNames = os.path.join(fasta_dir,"data.names") | |
16 | |
17 listOfClusters = [] | |
18 listOfHeaders = [] | |
19 headersNames = set() | |
20 idsNames = set() | |
21 | |
22 | |
23 names = os.listdir(results_dir) | |
24 cluster_seqs_stats_files = fnmatch.filter(names, '*.cluster.all') | |
25 with open(dataNames, "r") as names: | |
26 for line2 in names: | |
27 splits2 = line2.split() | |
28 fullHeader = '' | |
29 if len(splits2) >= 6: | |
30 fullHeader = splits2[5] | |
31 headersNames.add(fullHeader) | |
32 fullID = splits2[3] | |
33 idsNames.add(fullID) | |
34 | |
35 blackList = [] | |
36 numberOfClusters = 0 | |
37 for singleFile in sorted(cluster_seqs_stats_files): | |
38 singleFile = os.path.join(results_dir,singleFile) | |
39 numberOfClusters += 1 | |
40 with open(singleFile, "r") as f: | |
41 for line in f: | |
42 splits = line.split() | |
43 header = '' | |
44 idd = '' | |
45 if len(splits) >= 11: | |
46 header = splits[10] | |
47 idd = splits[8] | |
48 clustNum = splits[2] | |
49 listOfHeaders.append(header) | |
50 listOfClusters.append(clustNum) | |
51 if idd in idsNames: #header in headersNames: | |
52 blackList.append(idd) | |
53 | |
54 numberOfClusters += 1 # 1 cluster for all unassigned seqs | |
55 ignoreBlackList = False | |
56 with open(dataNames, "r") as names: | |
57 for line in names: | |
58 splits = line.split() | |
59 fullUniqeId = splits[3] | |
60 fullHeader = '' | |
61 fullID = '' | |
62 if len(splits) >= 6: | |
63 fullHeader = line.split()[5] | |
64 fullID = line.split()[3] | |
65 if ignoreBlackList or ( fullID not in blackList #fullHeader not in blackList | |
66 or len(fullHeader) == 0): | |
67 listOfHeaders.append(fullHeader) | |
68 listOfClusters.append(str(numberOfClusters)) | |
69 numberOfClusters += 1 # separate cluster for all unassigned seqs | |
70 # else: | |
71 # print ("Skip header", fullHeader) | |
72 | |
73 toWrite = "" | |
74 for i in range(len(listOfClusters)): | |
75 toWrite += "%s\t%s\n" % (listOfHeaders[i], listOfClusters[i]) | |
76 | |
77 with open(os.path.join(results_dir,"fullTab.tabular"), "w") as full: | |
78 full.write(toWrite) | |
79 | |
80 | |
81 pattern = re.compile("^RF.*$") | |
82 | |
83 if len(listOfHeaders) > 1: # and pattern.match(str(listOfHeaders[0])): | |
84 | |
85 completeness_score = metrics.completeness_score(listOfHeaders, listOfClusters) | |
86 homogeneity_score = metrics.homogeneity_score(listOfHeaders, listOfClusters) | |
87 adjusted_rand_score = metrics.adjusted_rand_score(listOfHeaders, listOfClusters) | |
88 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfHeaders, listOfClusters) | |
89 v_measure_score = metrics.v_measure_score(listOfHeaders, listOfClusters) | |
90 | |
91 toWrite = "completeness_score : {}\n".format(completeness_score) | |
92 toWrite += "homogeneity_score : {}\n".format(homogeneity_score) | |
93 toWrite += "adjusted_rand_score : {}\n".format(adjusted_rand_score) | |
94 toWrite += "adjusted_mutual_info_score : {}\n".format(adjusted_mutual_info_score) | |
95 toWrite += "v_measure_score : {}\n".format(v_measure_score) | |
96 | |
97 | |
98 else: | |
99 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA" | |
100 | |
101 with open(os.path.join(results_dir,"evaluation.txt"), "w") as fOut: | |
102 fOut.write(toWrite) | |
103 | |
104 | |
105 make_archive('RESULTS', 'zip', root_dir=results_dir) |