comparison evaluation.py @ 1:ed8c7191b322 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
author rnateam
date Thu, 22 Dec 2016 09:06:48 -0500
parents b797e13169a0
children b8e32e577597
comparison
equal deleted inserted replaced
0:b797e13169a0 1:ed8c7191b322
1 import glob 1 import glob
2 from os import system 2 from os import system
3 import re 3 import re
4 4
5
6 def sh(script): 5 def sh(script):
7 system("bash -c '%s'" % script) 6 system("bash -c '%s'" % script)
8 7
8 dataNames = "FASTA/data.names"
9 9
10 dataNames = "FASTA/data.names"
11 listOfClusters = [] 10 listOfClusters = []
12 listOfClasses = [] 11 listOfClasses = []
13 cluster_seqs_stats_path = "RESULTS/*.cluster.all" 12 cluster_seqs_stats_path = "RESULTS/*.cluster.all"
14 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) 13 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
15 14
17 numberOfClusters = 0 16 numberOfClusters = 0
18 for singleFile in sorted(cluster_seqs_stats_files): 17 for singleFile in sorted(cluster_seqs_stats_files):
19 numberOfClusters += 1 18 numberOfClusters += 1
20 with open(singleFile, "r") as f: 19 with open(singleFile, "r") as f:
21 for line in f.readlines(): 20 for line in f.readlines():
22 uniqueId = line.split()[6] 21 uniqueId = line.split()[7]
23 clustNum = line.split()[1] 22 clustNum = line.split()[1]
24 rnaClass, sep, tail = uniqueId.partition("_") 23 rnaClass, sep, tail = uniqueId.partition("_")
25 listOfClasses.append(rnaClass) 24 listOfClasses.append(rnaClass)
26 listOfClusters.append(clustNum) 25 listOfClusters.append(clustNum)
27 with open(dataNames, "r") as names: 26 with open(dataNames, "r") as names:
28 for line in names.readlines(): 27 for line in names.readlines():
29 fullUniqeId = line.split()[3] 28 fullUniqeId = line.split()[3]
30 rnaClass, sep, tail = fullUniqeId.partition("_") 29 rnaClass, sep, tail = fullUniqeId.partition("_")
31 short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] 30 if fullUniqeId == uniqueId:
32 if short_unique == uniqueId:
33 blackList.append(uniqueId) 31 blackList.append(uniqueId)
34 32
35 numberOfClusters += 1 # 1 cluster for all unassigned seqs 33 numberOfClusters += 1 # 1 cluster for all unassigned seqs
36 with open(dataNames, "r") as names: 34 with open(dataNames, "r") as names:
37 for line in names.readlines(): 35 for line in names.readlines():
38 fullUniqeId = line.split()[3] 36 fullUniqeId = line.split()[3]
39 rnaClass, sep, tail = fullUniqeId.partition("_") 37 rnaClass, sep, tail = fullUniqeId.partition("_")
40 short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0]
41 rnaClass, sep, tail = fullUniqeId.partition("_") 38 rnaClass, sep, tail = fullUniqeId.partition("_")
42 if short_unique not in blackList: 39 if fullUniqeId not in blackList:
43 listOfClasses.append(rnaClass) 40 listOfClasses.append(rnaClass)
44 listOfClusters.append(str(numberOfClusters)) 41 listOfClusters.append(str(numberOfClusters))
45 numberOfClusters += 1 # separate cluster for all unassigned seqs 42 numberOfClusters += 1 # separate cluster for all unassigned seqs
46 43
47 toWrite = "" 44 toWrite = ""