annotate TEisotools-1.0/commons/core/seq/ClusterConsensusCollection.py @ 6:20ec0d14798e draft

Uploaded
author urgi-team
date Wed, 20 Jul 2016 05:00:24 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
1 import re
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
2 from commons.core.seq.BioseqDB import BioseqDB
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
3
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
4 ## Record a collection of bioseqDB representing cluster consensus
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
5 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
6 class ClusterConsensusCollection(object):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
7
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
8 ## constructor
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
9 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
10 # @param clusterFileName string name of file containing the cluster of consensus
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
11 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
12 def __init__(self, clusterFileName):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
13 self._clusterFileName = clusterFileName
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
14 self._lClusterConsensus = []
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
15
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
16 def __eq__(self, o):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
17 if type(o) is type(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
18 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
19 return False
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
20
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
21 def __ne__(self, o):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
22 return not self.__eq__(o)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
23
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
24 def getLClusterConsensus(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
25 return self._lClusterConsensus
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
26
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
27 def fillCollection(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
28 iBioseqDBAllCluster = BioseqDB()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
29 fClusterFile = open(self._clusterFileName, "r")
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
30 iBioseqDBAllCluster.read(fClusterFile)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
31 fClusterFile.close()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
32 lHeader = iBioseqDBAllCluster.getHeaderList()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
33 firstHeader = lHeader[0]
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
34 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
35 clusterConsensus = BioseqDB()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
36 clusterConsensus.setName(previousClusterName)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
37 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
38 for header in lHeader[1:]:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
39 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
40 if clusterName != previousClusterName:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
41 self._lClusterConsensus.append(clusterConsensus)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
42 previousClusterName = clusterName
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
43 clusterConsensus = BioseqDB()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
44 clusterConsensus.setName(previousClusterName)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
45 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
46 self._lClusterConsensus.append(clusterConsensus)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
47
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
48 def _getClusterNameAndSeqHeader(self, header):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
49 m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
50 clusterNumber = m.group(2)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
51 clusterName = m.group(1) + clusterNumber
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
52 lPartsHeaderheader = header.split(" ")
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
53 seqHeader = lPartsHeaderheader[1]
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
54 return clusterName, seqHeader
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
55
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
56 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
57 ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
58 ibioseq.setHeader(seqHeader)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
59 clusterConsensus.add(ibioseq)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
60
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
61 def getNumClusterForAConsensus(self, seqName):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
62 nbCluster = 1
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
63 for bioseqDB in self._lClusterConsensus:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
64 if seqName in bioseqDB.getHeaderList():
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
65 return nbCluster
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
66 nbCluster += 1
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
67
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
68 def getNumConsensusInCluster(self, numCluster):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
69 return self._lClusterConsensus[numCluster - 1].getSize()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
70
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
71