view commons/core/seq/ClusterConsensusCollection.py @ 70:6b009f1530a8 draft

Deleted tmp file.
author m-zytnicki
date Wed, 18 Nov 2015 10:59:50 -0500
parents 769e306b7933
children
line wrap: on
line source

import re
from commons.core.seq.BioseqDB import BioseqDB

## Record a collection of bioseqDB representing cluster consensus
#
class ClusterConsensusCollection(object):

    ## constructor
    #
    # @param clusterFileName string name of file containing the cluster of consensus
    #
    def __init__(self, clusterFileName):
        self._clusterFileName = clusterFileName
        self._lClusterConsensus = []

    def __eq__(self, o):
        return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus

    def getLClusterConsensus(self):
        return self._lClusterConsensus
    
    def fillCollection(self):
        iBioseqDBAllCluster = BioseqDB()
        fClusterFile = open(self._clusterFileName, "r")
        iBioseqDBAllCluster.read(fClusterFile)
        fClusterFile.close()
        lHeader = iBioseqDBAllCluster.getHeaderList()
        firstHeader = lHeader[0]
        previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
        clusterConsensus = BioseqDB()
        clusterConsensus.setName(previousClusterName)
        self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
        for header in lHeader[1:]:
            clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
            if clusterName != previousClusterName:
                self._lClusterConsensus.append(clusterConsensus)
                previousClusterName = clusterName
                clusterConsensus = BioseqDB()
                clusterConsensus.setName(previousClusterName)
            self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
        self._lClusterConsensus.append(clusterConsensus)
                
    def _getClusterNameAndSeqHeader(self, header):
        m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
        clusterNumber = m.group(2)
        clusterName = m.group(1) + clusterNumber
        lPartsHeaderheader = header.split(" ")
        seqHeader = lPartsHeaderheader[1]
        return clusterName, seqHeader

    def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
        ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
        ibioseq.setHeader(seqHeader)
        clusterConsensus.add(ibioseq)
        
    def getNumClusterForAConsensus(self, seqName):
        nbCluster = 1
        for bioseqDB in self._lClusterConsensus:
            if seqName in bioseqDB.getHeaderList():
                return nbCluster
            nbCluster += 1
            
    def getNumConsensusInCluster(self, numCluster):
        return self._lClusterConsensus[numCluster - 1].getSize()