Mercurial > repos > yufei-luo > s_mart
diff commons/core/seq/ClusterConsensusCollection.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/seq/ClusterConsensusCollection.py Thu May 02 09:56:47 2013 -0400 @@ -0,0 +1,66 @@ +import re +from commons.core.seq.BioseqDB import BioseqDB + +## Record a collection of bioseqDB representing cluster consensus +# +class ClusterConsensusCollection(object): + + ## constructor + # + # @param clusterFileName string name of file containing the cluster of consensus + # + def __init__(self, clusterFileName): + self._clusterFileName = clusterFileName + self._lClusterConsensus = [] + + def __eq__(self, o): + return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus + + def getLClusterConsensus(self): + return self._lClusterConsensus + + def fillCollection(self): + iBioseqDBAllCluster = BioseqDB() + fClusterFile = open(self._clusterFileName, "r") + iBioseqDBAllCluster.read(fClusterFile) + fClusterFile.close() + lHeader = iBioseqDBAllCluster.getHeaderList() + firstHeader = lHeader[0] + previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader) + clusterConsensus = BioseqDB() + clusterConsensus.setName(previousClusterName) + self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus) + for header in lHeader[1:]: + clusterName, seqHeader = self._getClusterNameAndSeqHeader(header) + if clusterName != previousClusterName: + self._lClusterConsensus.append(clusterConsensus) + previousClusterName = clusterName + clusterConsensus = BioseqDB() + clusterConsensus.setName(previousClusterName) + self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus) + self._lClusterConsensus.append(clusterConsensus) + + def _getClusterNameAndSeqHeader(self, header): + m = re.match("(\D*)(\d+)Mb\d+\s.*", header) + clusterNumber = m.group(2) + clusterName = m.group(1) + clusterNumber + lPartsHeaderheader = header.split(" ") + seqHeader = lPartsHeaderheader[1] + return clusterName, seqHeader + + def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus): + ibioseq = iBioseqDBAllCluster.fetch(firstHeader) + ibioseq.setHeader(seqHeader) + clusterConsensus.add(ibioseq) + + def getNumClusterForAConsensus(self, seqName): + nbCluster = 1 + for bioseqDB in self._lClusterConsensus: + if seqName in bioseqDB.getHeaderList(): + return nbCluster + nbCluster += 1 + + def getNumConsensusInCluster(self, numCluster): + return self._lClusterConsensus[numCluster - 1].getSize() + +