Mercurial > repos > yufei-luo > s_mart
comparison commons/core/seq/ClusterConsensusCollection.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
37:d22fadc825e3 | 38:2c0c0a89fad7 |
---|---|
1 import re | |
2 from commons.core.seq.BioseqDB import BioseqDB | |
3 | |
4 ## Record a collection of bioseqDB representing cluster consensus | |
5 # | |
6 class ClusterConsensusCollection(object): | |
7 | |
8 ## constructor | |
9 # | |
10 # @param clusterFileName string name of file containing the cluster of consensus | |
11 # | |
12 def __init__(self, clusterFileName): | |
13 self._clusterFileName = clusterFileName | |
14 self._lClusterConsensus = [] | |
15 | |
16 def __eq__(self, o): | |
17 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus | |
18 | |
19 def getLClusterConsensus(self): | |
20 return self._lClusterConsensus | |
21 | |
22 def fillCollection(self): | |
23 iBioseqDBAllCluster = BioseqDB() | |
24 fClusterFile = open(self._clusterFileName, "r") | |
25 iBioseqDBAllCluster.read(fClusterFile) | |
26 fClusterFile.close() | |
27 lHeader = iBioseqDBAllCluster.getHeaderList() | |
28 firstHeader = lHeader[0] | |
29 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader) | |
30 clusterConsensus = BioseqDB() | |
31 clusterConsensus.setName(previousClusterName) | |
32 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus) | |
33 for header in lHeader[1:]: | |
34 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header) | |
35 if clusterName != previousClusterName: | |
36 self._lClusterConsensus.append(clusterConsensus) | |
37 previousClusterName = clusterName | |
38 clusterConsensus = BioseqDB() | |
39 clusterConsensus.setName(previousClusterName) | |
40 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus) | |
41 self._lClusterConsensus.append(clusterConsensus) | |
42 | |
43 def _getClusterNameAndSeqHeader(self, header): | |
44 m = re.match("(\D*)(\d+)Mb\d+\s.*", header) | |
45 clusterNumber = m.group(2) | |
46 clusterName = m.group(1) + clusterNumber | |
47 lPartsHeaderheader = header.split(" ") | |
48 seqHeader = lPartsHeaderheader[1] | |
49 return clusterName, seqHeader | |
50 | |
51 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus): | |
52 ibioseq = iBioseqDBAllCluster.fetch(firstHeader) | |
53 ibioseq.setHeader(seqHeader) | |
54 clusterConsensus.add(ibioseq) | |
55 | |
56 def getNumClusterForAConsensus(self, seqName): | |
57 nbCluster = 1 | |
58 for bioseqDB in self._lClusterConsensus: | |
59 if seqName in bioseqDB.getHeaderList(): | |
60 return nbCluster | |
61 nbCluster += 1 | |
62 | |
63 def getNumConsensusInCluster(self, numCluster): | |
64 return self._lClusterConsensus[numCluster - 1].getSize() | |
65 | |
66 |