Mercurial > repos > rnateam > graphclust_postprocessing
comparison evaluation.py @ 1:ed8c7191b322 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
| author | rnateam |
|---|---|
| date | Thu, 22 Dec 2016 09:06:48 -0500 |
| parents | b797e13169a0 |
| children | b8e32e577597 |
comparison
equal
deleted
inserted
replaced
| 0:b797e13169a0 | 1:ed8c7191b322 |
|---|---|
| 1 import glob | 1 import glob |
| 2 from os import system | 2 from os import system |
| 3 import re | 3 import re |
| 4 | 4 |
| 5 | |
| 6 def sh(script): | 5 def sh(script): |
| 7 system("bash -c '%s'" % script) | 6 system("bash -c '%s'" % script) |
| 8 | 7 |
| 8 dataNames = "FASTA/data.names" | |
| 9 | 9 |
| 10 dataNames = "FASTA/data.names" | |
| 11 listOfClusters = [] | 10 listOfClusters = [] |
| 12 listOfClasses = [] | 11 listOfClasses = [] |
| 13 cluster_seqs_stats_path = "RESULTS/*.cluster.all" | 12 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
| 14 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) | 13 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
| 15 | 14 |
| 17 numberOfClusters = 0 | 16 numberOfClusters = 0 |
| 18 for singleFile in sorted(cluster_seqs_stats_files): | 17 for singleFile in sorted(cluster_seqs_stats_files): |
| 19 numberOfClusters += 1 | 18 numberOfClusters += 1 |
| 20 with open(singleFile, "r") as f: | 19 with open(singleFile, "r") as f: |
| 21 for line in f.readlines(): | 20 for line in f.readlines(): |
| 22 uniqueId = line.split()[6] | 21 uniqueId = line.split()[7] |
| 23 clustNum = line.split()[1] | 22 clustNum = line.split()[1] |
| 24 rnaClass, sep, tail = uniqueId.partition("_") | 23 rnaClass, sep, tail = uniqueId.partition("_") |
| 25 listOfClasses.append(rnaClass) | 24 listOfClasses.append(rnaClass) |
| 26 listOfClusters.append(clustNum) | 25 listOfClusters.append(clustNum) |
| 27 with open(dataNames, "r") as names: | 26 with open(dataNames, "r") as names: |
| 28 for line in names.readlines(): | 27 for line in names.readlines(): |
| 29 fullUniqeId = line.split()[3] | 28 fullUniqeId = line.split()[3] |
| 30 rnaClass, sep, tail = fullUniqeId.partition("_") | 29 rnaClass, sep, tail = fullUniqeId.partition("_") |
| 31 short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] | 30 if fullUniqeId == uniqueId: |
| 32 if short_unique == uniqueId: | |
| 33 blackList.append(uniqueId) | 31 blackList.append(uniqueId) |
| 34 | 32 |
| 35 numberOfClusters += 1 # 1 cluster for all unassigned seqs | 33 numberOfClusters += 1 # 1 cluster for all unassigned seqs |
| 36 with open(dataNames, "r") as names: | 34 with open(dataNames, "r") as names: |
| 37 for line in names.readlines(): | 35 for line in names.readlines(): |
| 38 fullUniqeId = line.split()[3] | 36 fullUniqeId = line.split()[3] |
| 39 rnaClass, sep, tail = fullUniqeId.partition("_") | 37 rnaClass, sep, tail = fullUniqeId.partition("_") |
| 40 short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] | |
| 41 rnaClass, sep, tail = fullUniqeId.partition("_") | 38 rnaClass, sep, tail = fullUniqeId.partition("_") |
| 42 if short_unique not in blackList: | 39 if fullUniqeId not in blackList: |
| 43 listOfClasses.append(rnaClass) | 40 listOfClasses.append(rnaClass) |
| 44 listOfClusters.append(str(numberOfClusters)) | 41 listOfClusters.append(str(numberOfClusters)) |
| 45 numberOfClusters += 1 # separate cluster for all unassigned seqs | 42 numberOfClusters += 1 # separate cluster for all unassigned seqs |
| 46 | 43 |
| 47 toWrite = "" | 44 toWrite = "" |
