Mercurial > repos > yufei-luo > s_mart
comparison commons/core/seq/ClusterConsensusCollection.py @ 6:769e306b7933
Change the repository level.
| author | yufei-luo |
|---|---|
| date | Fri, 18 Jan 2013 04:54:14 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:ea3082881bf8 | 6:769e306b7933 |
|---|---|
| 1 import re | |
| 2 from commons.core.seq.BioseqDB import BioseqDB | |
| 3 | |
| 4 ## Record a collection of bioseqDB representing cluster consensus | |
| 5 # | |
| 6 class ClusterConsensusCollection(object): | |
| 7 | |
| 8 ## constructor | |
| 9 # | |
| 10 # @param clusterFileName string name of file containing the cluster of consensus | |
| 11 # | |
| 12 def __init__(self, clusterFileName): | |
| 13 self._clusterFileName = clusterFileName | |
| 14 self._lClusterConsensus = [] | |
| 15 | |
| 16 def __eq__(self, o): | |
| 17 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus | |
| 18 | |
| 19 def getLClusterConsensus(self): | |
| 20 return self._lClusterConsensus | |
| 21 | |
| 22 def fillCollection(self): | |
| 23 iBioseqDBAllCluster = BioseqDB() | |
| 24 fClusterFile = open(self._clusterFileName, "r") | |
| 25 iBioseqDBAllCluster.read(fClusterFile) | |
| 26 fClusterFile.close() | |
| 27 lHeader = iBioseqDBAllCluster.getHeaderList() | |
| 28 firstHeader = lHeader[0] | |
| 29 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader) | |
| 30 clusterConsensus = BioseqDB() | |
| 31 clusterConsensus.setName(previousClusterName) | |
| 32 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus) | |
| 33 for header in lHeader[1:]: | |
| 34 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header) | |
| 35 if clusterName != previousClusterName: | |
| 36 self._lClusterConsensus.append(clusterConsensus) | |
| 37 previousClusterName = clusterName | |
| 38 clusterConsensus = BioseqDB() | |
| 39 clusterConsensus.setName(previousClusterName) | |
| 40 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus) | |
| 41 self._lClusterConsensus.append(clusterConsensus) | |
| 42 | |
| 43 def _getClusterNameAndSeqHeader(self, header): | |
| 44 m = re.match("(\D*)(\d+)Mb\d+\s.*", header) | |
| 45 clusterNumber = m.group(2) | |
| 46 clusterName = m.group(1) + clusterNumber | |
| 47 lPartsHeaderheader = header.split(" ") | |
| 48 seqHeader = lPartsHeaderheader[1] | |
| 49 return clusterName, seqHeader | |
| 50 | |
| 51 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus): | |
| 52 ibioseq = iBioseqDBAllCluster.fetch(firstHeader) | |
| 53 ibioseq.setHeader(seqHeader) | |
| 54 clusterConsensus.add(ibioseq) | |
| 55 | |
| 56 def getNumClusterForAConsensus(self, seqName): | |
| 57 nbCluster = 1 | |
| 58 for bioseqDB in self._lClusterConsensus: | |
| 59 if seqName in bioseqDB.getHeaderList(): | |
| 60 return nbCluster | |
| 61 nbCluster += 1 | |
| 62 | |
| 63 def getNumConsensusInCluster(self, numCluster): | |
| 64 return self._lClusterConsensus[numCluster - 1].getSize() | |
| 65 | |
| 66 |
