diff commons/core/seq/ClusterConsensusCollection.py @ 36:44d5973c188c

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 15:02:29 -0400
parents 769e306b7933
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/ClusterConsensusCollection.py	Tue Apr 30 15:02:29 2013 -0400
@@ -0,0 +1,66 @@
+import re
+from commons.core.seq.BioseqDB import BioseqDB
+
+## Record a collection of bioseqDB representing cluster consensus
+#
+class ClusterConsensusCollection(object):
+
+    ## constructor
+    #
+    # @param clusterFileName string name of file containing the cluster of consensus
+    #
+    def __init__(self, clusterFileName):
+        self._clusterFileName = clusterFileName
+        self._lClusterConsensus = []
+
+    def __eq__(self, o):
+        return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
+
+    def getLClusterConsensus(self):
+        return self._lClusterConsensus
+    
+    def fillCollection(self):
+        iBioseqDBAllCluster = BioseqDB()
+        fClusterFile = open(self._clusterFileName, "r")
+        iBioseqDBAllCluster.read(fClusterFile)
+        fClusterFile.close()
+        lHeader = iBioseqDBAllCluster.getHeaderList()
+        firstHeader = lHeader[0]
+        previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
+        clusterConsensus = BioseqDB()
+        clusterConsensus.setName(previousClusterName)
+        self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
+        for header in lHeader[1:]:
+            clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
+            if clusterName != previousClusterName:
+                self._lClusterConsensus.append(clusterConsensus)
+                previousClusterName = clusterName
+                clusterConsensus = BioseqDB()
+                clusterConsensus.setName(previousClusterName)
+            self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
+        self._lClusterConsensus.append(clusterConsensus)
+                
+    def _getClusterNameAndSeqHeader(self, header):
+        m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
+        clusterNumber = m.group(2)
+        clusterName = m.group(1) + clusterNumber
+        lPartsHeaderheader = header.split(" ")
+        seqHeader = lPartsHeaderheader[1]
+        return clusterName, seqHeader
+
+    def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
+        ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
+        ibioseq.setHeader(seqHeader)
+        clusterConsensus.add(ibioseq)
+        
+    def getNumClusterForAConsensus(self, seqName):
+        nbCluster = 1
+        for bioseqDB in self._lClusterConsensus:
+            if seqName in bioseqDB.getHeaderList():
+                return nbCluster
+            nbCluster += 1
+            
+    def getNumConsensusInCluster(self, numCluster):
+        return self._lClusterConsensus[numCluster - 1].getSize()
+
+