Mercurial > repos > rnateam > graphclust_postprocessing
diff evaluation.py @ 12:b5f49453af8c draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 65d322f9ab2f24d65b307f3553589149a1d678d5
author | rnateam |
---|---|
date | Wed, 31 May 2017 14:53:30 -0400 |
parents | 869a6e807d76 |
children | 79df97a1bc0f |
line wrap: on
line diff
--- a/evaluation.py Sat Mar 25 16:50:38 2017 -0400 +++ b/evaluation.py Wed May 31 14:53:30 2017 -0400 @@ -1,3 +1,4 @@ +#!/usr/bin/env python2 import glob from os import system import re @@ -10,56 +11,64 @@ dataNames = "FASTA/data.names" listOfClusters = [] -listOfClasses = [] +listOfHeaders = [] +headersNames = set() cluster_seqs_stats_path = "RESULTS/*.cluster.all" cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) +with open(dataNames, "r") as names: + for line2 in names: + splits2 = line2.split() + fullHeader = '' + if len(splits2) >= 6: + fullHeader = splits2[5] + headersNames.add(fullHeader) + blackList = [] numberOfClusters = 0 for singleFile in sorted(cluster_seqs_stats_files): numberOfClusters += 1 with open(singleFile, "r") as f: - for line in f.readlines(): - uniqueId = line.split()[8] - clustNum = line.split()[2] - rnaClass, sep, tail = uniqueId.partition("_") - listOfClasses.append(rnaClass) + for line in f: + splits = line.split() + header = '' + if len(splits) >= 11: + header = splits[10] + clustNum = splits[2] + listOfHeaders.append(header) listOfClusters.append(clustNum) - with open(dataNames, "r") as names: - for line in names.readlines(): - fullUniqeId = line.split()[3] - rnaClass, sep, tail = fullUniqeId.partition("_") - if fullUniqeId == uniqueId: - blackList.append(uniqueId) + if header in headersNames: + blackList.append(header) numberOfClusters += 1 # 1 cluster for all unassigned seqs with open(dataNames, "r") as names: for line in names.readlines(): - fullUniqeId = line.split()[3] - rnaClass, sep, tail = fullUniqeId.partition("_") - rnaClass, sep, tail = fullUniqeId.partition("_") - if fullUniqeId not in blackList: - listOfClasses.append(rnaClass) + splits = line.split() + fullUniqeId = splits[3] + fullHeader = '' + if len(splits) >= 6: + fullHeader = line.split()[5] + if fullHeader not in blackList or len(fullHeader) == 0: + listOfHeaders.append(fullHeader) listOfClusters.append(str(numberOfClusters)) numberOfClusters += 1 # separate cluster for all unassigned seqs toWrite = "" for i in range(len(listOfClusters)): - toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n' + toWrite += listOfHeaders[i] + "\t" + listOfClusters[i] + '\n' with open("RESULTS/fullTab.tabular", "w") as full: full.write(toWrite) pattern = re.compile("^RF.*$") - -if len(listOfClasses) > 0 and pattern.match(str(listOfClasses[0])): +if len(listOfHeaders) > 1: # and pattern.match(str(listOfHeaders[0])): - completeness_score = metrics.completeness_score(listOfClasses, listOfClusters) - homogeneity_score = metrics.homogeneity_score(listOfClasses, listOfClusters) - adjusted_rand_score = metrics.adjusted_rand_score(listOfClasses, listOfClusters) - adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfClasses, listOfClusters) - v_measure_score = metrics.v_measure_score(listOfClasses, listOfClusters) + completeness_score = metrics.completeness_score(listOfHeaders, listOfClusters) + homogeneity_score = metrics.homogeneity_score(listOfHeaders, listOfClusters) + adjusted_rand_score = metrics.adjusted_rand_score(listOfHeaders, listOfClusters) + adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfHeaders, listOfClusters) + v_measure_score = metrics.v_measure_score(listOfHeaders, listOfClusters) toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score)