Mercurial > repos > urgi-team > teiso
diff TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py @ 16:836ce3d9d47a draft default tip
Uploaded
| author | urgi-team |
|---|---|
| date | Thu, 21 Jul 2016 07:42:47 -0400 |
| parents | 255c852351c5 |
| children |
line wrap: on
line diff
--- a/TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py Thu Jul 21 07:36:44 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -import re -from commons.core.seq.BioseqDB import BioseqDB - -## Record a collection of bioseqDB representing cluster consensus -# -class ClusterConsensusCollection(object): - - ## constructor - # - # @param clusterFileName string name of file containing the cluster of consensus - # - def __init__(self, clusterFileName): - self._clusterFileName = clusterFileName - self._lClusterConsensus = [] - - def __eq__(self, o): - if type(o) is type(self): - return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus - return False - - def __ne__(self, o): - return not self.__eq__(o) - - def getLClusterConsensus(self): - return self._lClusterConsensus - - def fillCollection(self): - iBioseqDBAllCluster = BioseqDB() - fClusterFile = open(self._clusterFileName, "r") - iBioseqDBAllCluster.read(fClusterFile) - fClusterFile.close() - lHeader = iBioseqDBAllCluster.getHeaderList() - firstHeader = lHeader[0] - previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader) - clusterConsensus = BioseqDB() - clusterConsensus.setName(previousClusterName) - self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus) - for header in lHeader[1:]: - clusterName, seqHeader = self._getClusterNameAndSeqHeader(header) - if clusterName != previousClusterName: - self._lClusterConsensus.append(clusterConsensus) - previousClusterName = clusterName - clusterConsensus = BioseqDB() - clusterConsensus.setName(previousClusterName) - self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus) - self._lClusterConsensus.append(clusterConsensus) - - def _getClusterNameAndSeqHeader(self, header): - m = re.match("(\D*)(\d+)Mb\d+\s.*", header) - clusterNumber = m.group(2) - clusterName = m.group(1) + clusterNumber - lPartsHeaderheader = header.split(" ") - seqHeader = lPartsHeaderheader[1] - return clusterName, seqHeader - - def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus): - ibioseq = iBioseqDBAllCluster.fetch(firstHeader) - ibioseq.setHeader(seqHeader) - clusterConsensus.add(ibioseq) - - def getNumClusterForAConsensus(self, seqName): - nbCluster = 1 - for bioseqDB in self._lClusterConsensus: - if seqName in bioseqDB.getHeaderList(): - return nbCluster - nbCluster += 1 - - def getNumConsensusInCluster(self, numCluster): - return self._lClusterConsensus[numCluster - 1].getSize() - -
