Mercurial > repos > rnateam > graphclust_postprocessing_no_align
comparison evaluation.py @ 0:0a48b2db75e7 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResultsNoAlign commit 2a6fd70c1bcec36ffdf0bba2ec82489b39cfc84e
| author | rnateam |
|---|---|
| date | Sat, 27 Oct 2018 13:49:00 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0a48b2db75e7 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import glob | |
| 3 from os import system | |
| 4 import re | |
| 5 from sklearn import metrics | |
| 6 from shutil import make_archive | |
| 7 import sys | |
| 8 import fnmatch, os | |
| 9 | |
| 10 def sh(script): | |
| 11 system("bash -c '%s'" % script) | |
| 12 | |
| 13 fasta_dir = sys.argv[1] | |
| 14 results_dir = sys.argv[2] | |
| 15 dataNames = os.path.join(fasta_dir,"data.names") | |
| 16 | |
| 17 listOfClusters = [] | |
| 18 listOfHeaders = [] | |
| 19 headersNames = set() | |
| 20 idsNames = set() | |
| 21 | |
| 22 | |
| 23 names = os.listdir(results_dir) | |
| 24 cluster_seqs_stats_files = fnmatch.filter(names, '*.cluster.all') | |
| 25 with open(dataNames, "r") as names: | |
| 26 for line2 in names: | |
| 27 splits2 = line2.split() | |
| 28 fullHeader = '' | |
| 29 if len(splits2) >= 6: | |
| 30 fullHeader = splits2[5] | |
| 31 headersNames.add(fullHeader) | |
| 32 fullID = splits2[3] | |
| 33 idsNames.add(fullID) | |
| 34 | |
| 35 blackList = [] | |
| 36 numberOfClusters = 0 | |
| 37 for singleFile in sorted(cluster_seqs_stats_files): | |
| 38 singleFile = os.path.join(results_dir,singleFile) | |
| 39 numberOfClusters += 1 | |
| 40 with open(singleFile, "r") as f: | |
| 41 for line in f: | |
| 42 splits = line.split() | |
| 43 header = '' | |
| 44 idd = '' | |
| 45 if len(splits) >= 11: | |
| 46 header = splits[10] | |
| 47 idd = splits[8] | |
| 48 clustNum = splits[2] | |
| 49 listOfHeaders.append(header) | |
| 50 listOfClusters.append(clustNum) | |
| 51 if idd in idsNames: #header in headersNames: | |
| 52 blackList.append(idd) | |
| 53 | |
| 54 numberOfClusters += 1 # 1 cluster for all unassigned seqs | |
| 55 ignoreBlackList = False | |
| 56 with open(dataNames, "r") as names: | |
| 57 for line in names: | |
| 58 splits = line.split() | |
| 59 fullUniqeId = splits[3] | |
| 60 fullHeader = '' | |
| 61 fullID = '' | |
| 62 if len(splits) >= 6: | |
| 63 fullHeader = line.split()[5] | |
| 64 fullID = line.split()[3] | |
| 65 if ignoreBlackList or ( fullID not in blackList #fullHeader not in blackList | |
| 66 or len(fullHeader) == 0): | |
| 67 listOfHeaders.append(fullHeader) | |
| 68 listOfClusters.append(str(numberOfClusters)) | |
| 69 numberOfClusters += 1 # separate cluster for all unassigned seqs | |
| 70 # else: | |
| 71 # print ("Skip header", fullHeader) | |
| 72 | |
| 73 toWrite = "" | |
| 74 for i in range(len(listOfClusters)): | |
| 75 toWrite += "%s\t%s\n" % (listOfHeaders[i], listOfClusters[i]) | |
| 76 | |
| 77 with open(os.path.join(results_dir,"fullTab.tabular"), "w") as full: | |
| 78 full.write(toWrite) | |
| 79 | |
| 80 | |
| 81 pattern = re.compile("^RF.*$") | |
| 82 | |
| 83 if len(listOfHeaders) > 1: # and pattern.match(str(listOfHeaders[0])): | |
| 84 | |
| 85 completeness_score = metrics.completeness_score(listOfHeaders, listOfClusters) | |
| 86 homogeneity_score = metrics.homogeneity_score(listOfHeaders, listOfClusters) | |
| 87 adjusted_rand_score = metrics.adjusted_rand_score(listOfHeaders, listOfClusters) | |
| 88 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfHeaders, listOfClusters) | |
| 89 v_measure_score = metrics.v_measure_score(listOfHeaders, listOfClusters) | |
| 90 | |
| 91 toWrite = "completeness_score : {}\n".format(completeness_score) | |
| 92 toWrite += "homogeneity_score : {}\n".format(homogeneity_score) | |
| 93 toWrite += "adjusted_rand_score : {}\n".format(adjusted_rand_score) | |
| 94 toWrite += "adjusted_mutual_info_score : {}\n".format(adjusted_mutual_info_score) | |
| 95 toWrite += "v_measure_score : {}\n".format(v_measure_score) | |
| 96 | |
| 97 | |
| 98 else: | |
| 99 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA" | |
| 100 | |
| 101 with open(os.path.join(results_dir,"evaluation.txt"), "w") as fOut: | |
| 102 fOut.write(toWrite) | |
| 103 | |
| 104 | |
| 105 make_archive('RESULTS', 'zip', root_dir=results_dir) |
