annotate commons/core/seq/ClusterConsensusCollection.py @ 63:5f210bc9f486

Added a simple test for Clusterize
author m-zytnicki
date Mon, 19 Oct 2015 12:02:29 +0200
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 import re
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 from commons.core.seq.BioseqDB import BioseqDB
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 ## Record a collection of bioseqDB representing cluster consensus
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 class ClusterConsensusCollection(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 ## constructor
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 # @param clusterFileName string name of file containing the cluster of consensus
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 def __init__(self, clusterFileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 self._clusterFileName = clusterFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 self._lClusterConsensus = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 def __eq__(self, o):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 def getLClusterConsensus(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 return self._lClusterConsensus
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 def fillCollection(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 iBioseqDBAllCluster = BioseqDB()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 fClusterFile = open(self._clusterFileName, "r")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 iBioseqDBAllCluster.read(fClusterFile)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 fClusterFile.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 lHeader = iBioseqDBAllCluster.getHeaderList()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 firstHeader = lHeader[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 clusterConsensus = BioseqDB()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 clusterConsensus.setName(previousClusterName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 for header in lHeader[1:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 if clusterName != previousClusterName:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 self._lClusterConsensus.append(clusterConsensus)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 previousClusterName = clusterName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 clusterConsensus = BioseqDB()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 clusterConsensus.setName(previousClusterName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41 self._lClusterConsensus.append(clusterConsensus)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 def _getClusterNameAndSeqHeader(self, header):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 clusterNumber = m.group(2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 clusterName = m.group(1) + clusterNumber
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 lPartsHeaderheader = header.split(" ")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 seqHeader = lPartsHeaderheader[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 return clusterName, seqHeader
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 ibioseq.setHeader(seqHeader)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 clusterConsensus.add(ibioseq)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 def getNumClusterForAConsensus(self, seqName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 nbCluster = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 for bioseqDB in self._lClusterConsensus:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 if seqName in bioseqDB.getHeaderList():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 return nbCluster
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 nbCluster += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 def getNumConsensusInCluster(self, numCluster):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 return self._lClusterConsensus[numCluster - 1].getSize()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66