Mercurial > repos > rnateam > graphclust_postprocessing
comparison evaluation.py @ 1:ed8c7191b322 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
author | rnateam |
---|---|
date | Thu, 22 Dec 2016 09:06:48 -0500 |
parents | b797e13169a0 |
children | b8e32e577597 |
comparison
equal
deleted
inserted
replaced
0:b797e13169a0 | 1:ed8c7191b322 |
---|---|
1 import glob | 1 import glob |
2 from os import system | 2 from os import system |
3 import re | 3 import re |
4 | 4 |
5 | |
6 def sh(script): | 5 def sh(script): |
7 system("bash -c '%s'" % script) | 6 system("bash -c '%s'" % script) |
8 | 7 |
8 dataNames = "FASTA/data.names" | |
9 | 9 |
10 dataNames = "FASTA/data.names" | |
11 listOfClusters = [] | 10 listOfClusters = [] |
12 listOfClasses = [] | 11 listOfClasses = [] |
13 cluster_seqs_stats_path = "RESULTS/*.cluster.all" | 12 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
14 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) | 13 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
15 | 14 |
17 numberOfClusters = 0 | 16 numberOfClusters = 0 |
18 for singleFile in sorted(cluster_seqs_stats_files): | 17 for singleFile in sorted(cluster_seqs_stats_files): |
19 numberOfClusters += 1 | 18 numberOfClusters += 1 |
20 with open(singleFile, "r") as f: | 19 with open(singleFile, "r") as f: |
21 for line in f.readlines(): | 20 for line in f.readlines(): |
22 uniqueId = line.split()[6] | 21 uniqueId = line.split()[7] |
23 clustNum = line.split()[1] | 22 clustNum = line.split()[1] |
24 rnaClass, sep, tail = uniqueId.partition("_") | 23 rnaClass, sep, tail = uniqueId.partition("_") |
25 listOfClasses.append(rnaClass) | 24 listOfClasses.append(rnaClass) |
26 listOfClusters.append(clustNum) | 25 listOfClusters.append(clustNum) |
27 with open(dataNames, "r") as names: | 26 with open(dataNames, "r") as names: |
28 for line in names.readlines(): | 27 for line in names.readlines(): |
29 fullUniqeId = line.split()[3] | 28 fullUniqeId = line.split()[3] |
30 rnaClass, sep, tail = fullUniqeId.partition("_") | 29 rnaClass, sep, tail = fullUniqeId.partition("_") |
31 short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] | 30 if fullUniqeId == uniqueId: |
32 if short_unique == uniqueId: | |
33 blackList.append(uniqueId) | 31 blackList.append(uniqueId) |
34 | 32 |
35 numberOfClusters += 1 # 1 cluster for all unassigned seqs | 33 numberOfClusters += 1 # 1 cluster for all unassigned seqs |
36 with open(dataNames, "r") as names: | 34 with open(dataNames, "r") as names: |
37 for line in names.readlines(): | 35 for line in names.readlines(): |
38 fullUniqeId = line.split()[3] | 36 fullUniqeId = line.split()[3] |
39 rnaClass, sep, tail = fullUniqeId.partition("_") | 37 rnaClass, sep, tail = fullUniqeId.partition("_") |
40 short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] | |
41 rnaClass, sep, tail = fullUniqeId.partition("_") | 38 rnaClass, sep, tail = fullUniqeId.partition("_") |
42 if short_unique not in blackList: | 39 if fullUniqeId not in blackList: |
43 listOfClasses.append(rnaClass) | 40 listOfClasses.append(rnaClass) |
44 listOfClusters.append(str(numberOfClusters)) | 41 listOfClusters.append(str(numberOfClusters)) |
45 numberOfClusters += 1 # separate cluster for all unassigned seqs | 42 numberOfClusters += 1 # separate cluster for all unassigned seqs |
46 | 43 |
47 toWrite = "" | 44 toWrite = "" |