Mercurial > repos > rnateam > graphclust_postprocessing
annotate evaluation.py @ 6:869a6e807d76 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
author | rnateam |
---|---|
date | Wed, 22 Feb 2017 16:51:06 -0500 |
parents | 4310ac018d05 |
children | b5f49453af8c |
rev | line source |
---|---|
0
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
1 import glob |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
2 from os import system |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
3 import re |
2
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
4 from sklearn import metrics |
5
4310ac018d05
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
rnateam
parents:
3
diff
changeset
|
5 from shutil import make_archive |
0
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
6 |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
7 def sh(script): |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
8 system("bash -c '%s'" % script) |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
9 |
1
ed8c7191b322
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents:
0
diff
changeset
|
10 dataNames = "FASTA/data.names" |
0
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
11 |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
12 listOfClusters = [] |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
13 listOfClasses = [] |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
14 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
15 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
16 |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
17 blackList = [] |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
18 numberOfClusters = 0 |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
19 for singleFile in sorted(cluster_seqs_stats_files): |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
20 numberOfClusters += 1 |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
21 with open(singleFile, "r") as f: |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
22 for line in f.readlines(): |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
5
diff
changeset
|
23 uniqueId = line.split()[8] |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
5
diff
changeset
|
24 clustNum = line.split()[2] |
0
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
25 rnaClass, sep, tail = uniqueId.partition("_") |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
26 listOfClasses.append(rnaClass) |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
27 listOfClusters.append(clustNum) |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
28 with open(dataNames, "r") as names: |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
29 for line in names.readlines(): |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
30 fullUniqeId = line.split()[3] |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
31 rnaClass, sep, tail = fullUniqeId.partition("_") |
1
ed8c7191b322
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents:
0
diff
changeset
|
32 if fullUniqeId == uniqueId: |
0
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
33 blackList.append(uniqueId) |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
34 |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
35 numberOfClusters += 1 # 1 cluster for all unassigned seqs |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
36 with open(dataNames, "r") as names: |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
37 for line in names.readlines(): |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
38 fullUniqeId = line.split()[3] |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
39 rnaClass, sep, tail = fullUniqeId.partition("_") |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
40 rnaClass, sep, tail = fullUniqeId.partition("_") |
1
ed8c7191b322
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents:
0
diff
changeset
|
41 if fullUniqeId not in blackList: |
0
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
42 listOfClasses.append(rnaClass) |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
43 listOfClusters.append(str(numberOfClusters)) |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
44 numberOfClusters += 1 # separate cluster for all unassigned seqs |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
45 |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
46 toWrite = "" |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
47 for i in range(len(listOfClusters)): |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
48 toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n' |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
49 with open("RESULTS/fullTab.tabular", "w") as full: |
b797e13169a0
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff
changeset
|
50 full.write(toWrite) |
2
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
51 |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
52 |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
53 pattern = re.compile("^RF.*$") |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
54 |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
55 |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
56 if len(listOfClasses) > 0 and pattern.match(str(listOfClasses[0])): |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
57 |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
58 completeness_score = metrics.completeness_score(listOfClasses, listOfClusters) |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
59 homogeneity_score = metrics.homogeneity_score(listOfClasses, listOfClusters) |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
60 adjusted_rand_score = metrics.adjusted_rand_score(listOfClasses, listOfClusters) |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
61 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfClasses, listOfClusters) |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
62 v_measure_score = metrics.v_measure_score(listOfClasses, listOfClusters) |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
63 |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
64 toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score) |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
65 |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
66 else: |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
67 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA" |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
68 |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
69 with open("RESULTS/evaluation.txt", "w") as fOut: |
b8e32e577597
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents:
1
diff
changeset
|
70 fOut.write(toWrite) |
5
4310ac018d05
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
rnateam
parents:
3
diff
changeset
|
71 |
4310ac018d05
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
rnateam
parents:
3
diff
changeset
|
72 |
4310ac018d05
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
rnateam
parents:
3
diff
changeset
|
73 make_archive('RESULTS', 'zip', root_dir='RESULTS') |