Mercurial > repos > rnateam > graphclust_postprocessing
changeset 16:79df97a1bc0f draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit b8f82a8101d9eb74c8dbac51b8a0c75585a888a2
author | rnateam |
---|---|
date | Fri, 23 Feb 2018 10:46:41 -0500 |
parents | c7ca5d173482 |
children | f93c868203cc |
files | evaluation.py glob_report.xml |
diffstat | 2 files changed, 30 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/evaluation.py Mon Nov 20 04:50:48 2017 -0500 +++ b/evaluation.py Fri Feb 23 10:46:41 2018 -0500 @@ -4,18 +4,24 @@ import re from sklearn import metrics from shutil import make_archive +import sys +import fnmatch, os def sh(script): system("bash -c '%s'" % script) -dataNames = "FASTA/data.names" +fasta_dir = sys.argv[1] +results_dir = sys.argv[2] +dataNames = fasta_dir+"/data.names" listOfClusters = [] listOfHeaders = [] headersNames = set() -cluster_seqs_stats_path = "RESULTS/*.cluster.all" -cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) +idsNames = set() + +names = os.listdir(results_dir) +cluster_seqs_stats_files = fnmatch.filter(names, '*.cluster.all') with open(dataNames, "r") as names: for line2 in names: splits2 = line2.split() @@ -23,40 +29,52 @@ if len(splits2) >= 6: fullHeader = splits2[5] headersNames.add(fullHeader) + fullID = splits2[3] + idsNames.add(fullID) blackList = [] numberOfClusters = 0 for singleFile in sorted(cluster_seqs_stats_files): + singleFile = os.path.join(results_dir,singleFile) numberOfClusters += 1 with open(singleFile, "r") as f: for line in f: splits = line.split() header = '' + idd = '' if len(splits) >= 11: header = splits[10] + idd = splits[8] clustNum = splits[2] listOfHeaders.append(header) listOfClusters.append(clustNum) - if header in headersNames: - blackList.append(header) + if idd in idsNames: #header in headersNames: + blackList.append(idd) numberOfClusters += 1 # 1 cluster for all unassigned seqs +ignoreBlackList = False with open(dataNames, "r") as names: for line in names.readlines(): splits = line.split() fullUniqeId = splits[3] fullHeader = '' + fullID = '' if len(splits) >= 6: fullHeader = line.split()[5] - if fullHeader not in blackList or len(fullHeader) == 0: + fullID = line.split()[3] + if ignoreBlackList or ( fullID not in blackList #fullHeader not in blackList + or len(fullHeader) == 0): listOfHeaders.append(fullHeader) listOfClusters.append(str(numberOfClusters)) numberOfClusters += 1 # separate cluster for all unassigned seqs + # else: + # print ("Skip header", fullHeader) toWrite = "" for i in range(len(listOfClusters)): toWrite += listOfHeaders[i] + "\t" + listOfClusters[i] + '\n' -with open("RESULTS/fullTab.tabular", "w") as full: + +with open(results_dir+"/fullTab.tabular", "w") as full: full.write(toWrite) @@ -72,11 +90,12 @@ toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score) + else: toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA" -with open("RESULTS/evaluation.txt", "w") as fOut: +with open(os.path.join(results_dir,"evaluation.txt"), "w") as fOut: fOut.write(toWrite) -make_archive('RESULTS', 'zip', root_dir='RESULTS') +make_archive('RESULTS', 'zip', root_dir=results_dir)
--- a/glob_report.xml Mon Nov 20 04:50:48 2017 -0500 +++ b/glob_report.xml Fri Feb 23 10:46:41 2018 -0500 @@ -1,4 +1,4 @@ -<tool id="glob_report" name="cluster_collection_report" version="0.3" > +<tool id="glob_report" name="cluster_collection_report" version="0.4" > <requirements> <requirement type="package" version="0.5.2">graphclust-wrappers</requirement> <requirement type="package" version='0.5'>perl-array-utils</requirement> @@ -73,7 +73,7 @@ #end if && - python '$__tool_directory__/evaluation.py' + python '$__tool_directory__/evaluation.py' FASTA/ RESULTS/ #if $cdhit: &&