comparison commons/core/seq/ClusterConsensusCollection.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 import re
2 from commons.core.seq.BioseqDB import BioseqDB
3
4 ## Record a collection of bioseqDB representing cluster consensus
5 #
6 class ClusterConsensusCollection(object):
7
8 ## constructor
9 #
10 # @param clusterFileName string name of file containing the cluster of consensus
11 #
12 def __init__(self, clusterFileName):
13 self._clusterFileName = clusterFileName
14 self._lClusterConsensus = []
15
16 def __eq__(self, o):
17 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
18
19 def getLClusterConsensus(self):
20 return self._lClusterConsensus
21
22 def fillCollection(self):
23 iBioseqDBAllCluster = BioseqDB()
24 fClusterFile = open(self._clusterFileName, "r")
25 iBioseqDBAllCluster.read(fClusterFile)
26 fClusterFile.close()
27 lHeader = iBioseqDBAllCluster.getHeaderList()
28 firstHeader = lHeader[0]
29 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
30 clusterConsensus = BioseqDB()
31 clusterConsensus.setName(previousClusterName)
32 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
33 for header in lHeader[1:]:
34 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
35 if clusterName != previousClusterName:
36 self._lClusterConsensus.append(clusterConsensus)
37 previousClusterName = clusterName
38 clusterConsensus = BioseqDB()
39 clusterConsensus.setName(previousClusterName)
40 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
41 self._lClusterConsensus.append(clusterConsensus)
42
43 def _getClusterNameAndSeqHeader(self, header):
44 m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
45 clusterNumber = m.group(2)
46 clusterName = m.group(1) + clusterNumber
47 lPartsHeaderheader = header.split(" ")
48 seqHeader = lPartsHeaderheader[1]
49 return clusterName, seqHeader
50
51 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
52 ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
53 ibioseq.setHeader(seqHeader)
54 clusterConsensus.add(ibioseq)
55
56 def getNumClusterForAConsensus(self, seqName):
57 nbCluster = 1
58 for bioseqDB in self._lClusterConsensus:
59 if seqName in bioseqDB.getHeaderList():
60 return nbCluster
61 nbCluster += 1
62
63 def getNumConsensusInCluster(self, numCluster):
64 return self._lClusterConsensus[numCluster - 1].getSize()
65
66