Mercurial > repos > rnateam > graphclust_postprocessing
comparison evaluation.py @ 12:b5f49453af8c draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 65d322f9ab2f24d65b307f3553589149a1d678d5
| author | rnateam |
|---|---|
| date | Wed, 31 May 2017 14:53:30 -0400 |
| parents | 869a6e807d76 |
| children | 79df97a1bc0f |
comparison
equal
deleted
inserted
replaced
| 11:e080ebe95476 | 12:b5f49453af8c |
|---|---|
| 1 #!/usr/bin/env python2 | |
| 1 import glob | 2 import glob |
| 2 from os import system | 3 from os import system |
| 3 import re | 4 import re |
| 4 from sklearn import metrics | 5 from sklearn import metrics |
| 5 from shutil import make_archive | 6 from shutil import make_archive |
| 8 system("bash -c '%s'" % script) | 9 system("bash -c '%s'" % script) |
| 9 | 10 |
| 10 dataNames = "FASTA/data.names" | 11 dataNames = "FASTA/data.names" |
| 11 | 12 |
| 12 listOfClusters = [] | 13 listOfClusters = [] |
| 13 listOfClasses = [] | 14 listOfHeaders = [] |
| 15 headersNames = set() | |
| 14 cluster_seqs_stats_path = "RESULTS/*.cluster.all" | 16 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
| 15 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) | 17 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
| 18 | |
| 19 with open(dataNames, "r") as names: | |
| 20 for line2 in names: | |
| 21 splits2 = line2.split() | |
| 22 fullHeader = '' | |
| 23 if len(splits2) >= 6: | |
| 24 fullHeader = splits2[5] | |
| 25 headersNames.add(fullHeader) | |
| 16 | 26 |
| 17 blackList = [] | 27 blackList = [] |
| 18 numberOfClusters = 0 | 28 numberOfClusters = 0 |
| 19 for singleFile in sorted(cluster_seqs_stats_files): | 29 for singleFile in sorted(cluster_seqs_stats_files): |
| 20 numberOfClusters += 1 | 30 numberOfClusters += 1 |
| 21 with open(singleFile, "r") as f: | 31 with open(singleFile, "r") as f: |
| 22 for line in f.readlines(): | 32 for line in f: |
| 23 uniqueId = line.split()[8] | 33 splits = line.split() |
| 24 clustNum = line.split()[2] | 34 header = '' |
| 25 rnaClass, sep, tail = uniqueId.partition("_") | 35 if len(splits) >= 11: |
| 26 listOfClasses.append(rnaClass) | 36 header = splits[10] |
| 37 clustNum = splits[2] | |
| 38 listOfHeaders.append(header) | |
| 27 listOfClusters.append(clustNum) | 39 listOfClusters.append(clustNum) |
| 28 with open(dataNames, "r") as names: | 40 if header in headersNames: |
| 29 for line in names.readlines(): | 41 blackList.append(header) |
| 30 fullUniqeId = line.split()[3] | |
| 31 rnaClass, sep, tail = fullUniqeId.partition("_") | |
| 32 if fullUniqeId == uniqueId: | |
| 33 blackList.append(uniqueId) | |
| 34 | 42 |
| 35 numberOfClusters += 1 # 1 cluster for all unassigned seqs | 43 numberOfClusters += 1 # 1 cluster for all unassigned seqs |
| 36 with open(dataNames, "r") as names: | 44 with open(dataNames, "r") as names: |
| 37 for line in names.readlines(): | 45 for line in names.readlines(): |
| 38 fullUniqeId = line.split()[3] | 46 splits = line.split() |
| 39 rnaClass, sep, tail = fullUniqeId.partition("_") | 47 fullUniqeId = splits[3] |
| 40 rnaClass, sep, tail = fullUniqeId.partition("_") | 48 fullHeader = '' |
| 41 if fullUniqeId not in blackList: | 49 if len(splits) >= 6: |
| 42 listOfClasses.append(rnaClass) | 50 fullHeader = line.split()[5] |
| 51 if fullHeader not in blackList or len(fullHeader) == 0: | |
| 52 listOfHeaders.append(fullHeader) | |
| 43 listOfClusters.append(str(numberOfClusters)) | 53 listOfClusters.append(str(numberOfClusters)) |
| 44 numberOfClusters += 1 # separate cluster for all unassigned seqs | 54 numberOfClusters += 1 # separate cluster for all unassigned seqs |
| 45 | 55 |
| 46 toWrite = "" | 56 toWrite = "" |
| 47 for i in range(len(listOfClusters)): | 57 for i in range(len(listOfClusters)): |
| 48 toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n' | 58 toWrite += listOfHeaders[i] + "\t" + listOfClusters[i] + '\n' |
| 49 with open("RESULTS/fullTab.tabular", "w") as full: | 59 with open("RESULTS/fullTab.tabular", "w") as full: |
| 50 full.write(toWrite) | 60 full.write(toWrite) |
| 51 | 61 |
| 52 | 62 |
| 53 pattern = re.compile("^RF.*$") | 63 pattern = re.compile("^RF.*$") |
| 54 | 64 |
| 65 if len(listOfHeaders) > 1: # and pattern.match(str(listOfHeaders[0])): | |
| 55 | 66 |
| 56 if len(listOfClasses) > 0 and pattern.match(str(listOfClasses[0])): | 67 completeness_score = metrics.completeness_score(listOfHeaders, listOfClusters) |
| 57 | 68 homogeneity_score = metrics.homogeneity_score(listOfHeaders, listOfClusters) |
| 58 completeness_score = metrics.completeness_score(listOfClasses, listOfClusters) | 69 adjusted_rand_score = metrics.adjusted_rand_score(listOfHeaders, listOfClusters) |
| 59 homogeneity_score = metrics.homogeneity_score(listOfClasses, listOfClusters) | 70 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfHeaders, listOfClusters) |
| 60 adjusted_rand_score = metrics.adjusted_rand_score(listOfClasses, listOfClusters) | 71 v_measure_score = metrics.v_measure_score(listOfHeaders, listOfClusters) |
| 61 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfClasses, listOfClusters) | |
| 62 v_measure_score = metrics.v_measure_score(listOfClasses, listOfClusters) | |
| 63 | 72 |
| 64 toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score) | 73 toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score) |
| 65 | 74 |
| 66 else: | 75 else: |
| 67 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA" | 76 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA" |
