annotate commons/tools/AnnotationStats.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 # Copyright INRA (Institut National de la Recherche Agronomique)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 # http://www.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # http://urgi.versailles.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # This software is governed by the CeCILL license under French law and
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # abiding by the rules of distribution of free software. You can use,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # modify and/ or redistribute the software under the terms of the CeCILL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 # license as circulated by CEA, CNRS and INRIA at the following URL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # "http://www.cecill.info".
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 # As a counterpart to the access to the source code and rights to copy,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 # modify and redistribute granted by the license, users are provided only
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 # with a limited warranty and the software's author, the holder of the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 # economic rights, and the successive licensors have only limited
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 # liability.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 # In this respect, the user's attention is drawn to the risks associated
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 # with loading, using, modifying and/or developing or reproducing the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 # software by the user in light of its specific status of free software,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 # that may mean that it is complicated to manipulate, and that also
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 # therefore means that it is reserved for developers and experienced
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 # professionals having in-depth computer knowledge. Users are therefore
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 # encouraged to load and test the software's suitability as regards their
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 # requirements in conditions enabling the security of their systems and/or
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 # data to be ensured and, more generally, to use and operate it in the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 # same conditions as regards security.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 # The fact that you are presently reading this means that you have had
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 # knowledge of the CeCILL license and that you accept its terms.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 ##@file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 # Give summary information on a TE annotation table.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 # options:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 # -h: this help
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 # -t: analysis type (default = 1, 1: per transposable element (TE), 2: per cluster, 3: per classification, 4: with map input file)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 # -p: name of the table (_path) or file (.path) with the annotated TE copies
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 # -s: name of the table (_seq) or file (.fasta or .fa) with the TE reference sequences
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 # -g: length of the genome (in bp)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 # -m: name of the file with the group and the corresponding TE names (format = 'map')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 # -o: name of the output file (default = pathTableName + '_stats.txt')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 # -C: name of the configuration file to access MySQL (e.g. 'TEannot.cfg')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 # -c: remove map files and blastclust file (if analysis type is 2 or 3)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 # -I: identity coverage threshold (default = 0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 # -L: length coverage threshold (default=0.8)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 # -v: verbosity level (default = 0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 from commons.core.LoggerFactory import LoggerFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 from commons.core.stat.Stat import Stat
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 from commons.core.sql.DbFactory import DbFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 from commons.tools.getCumulLengthFromTEannot import getCumulLengthFromTEannot
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 LOG_DEPTH = "repet.tools"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 #TODO: use templating engine instead of raw strings for AnnotationStatsWriter
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 class AnnotationStats( object ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 def __init__(self, analysisName="TE", clusterFileName="",seqTableName="", pathTableName="", genomeLength=0, statsFileName="", globalStatsFileName="", verbosity=3):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 self._analysisName = analysisName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 self._clusterFileName = clusterFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 self._seqTableName = seqTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 self._pathTableName = pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 self._genomeLength = genomeLength
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 self._statsFileName = statsFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 self._globalStatsFileName = globalStatsFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 self._iDb = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 self._iTablePathAdaptator = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 self._iTableSeqAdaptator = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 self._save = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 self._clean = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 self._verbosity = verbosity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 def _logAndRaise(self, errorMsg):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 self._log.error(errorMsg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 raise Exception(errorMsg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 def setCoverageThreshold( self, lengthThresh ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 self._coverageThreshold = float(lengthThresh)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 def setIdentityThreshold( self, identityThresh ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 self._identityThreshold = int(identityThresh)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 def setAnalyseType(self, analyseType):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 self._analyseType = str(analyseType)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 def setPathTableName(self, pathTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 self._pathTableName = pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 def setDBInstance(self, iDb):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 self._iDb = iDb
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 def setTablePathAdaptator(self, iTablePathAdaptator):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 self._iTablePathAdaptator = iTablePathAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 def setTableSeqAdaptator(self, iTableSeqAdaptator):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 self._iTableSeqAdaptator = iTableSeqAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 ## Get the coverage of TE copies for a given family (using 'mapOp')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 # @param consensus string name of a TE family ('subject_name' in the 'path' table)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 # @return cumulCoverage integer cumulative coverage
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 def getCumulCoverage( self, consensus = "" ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 gclft = getCumulLengthFromTEannot()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 gclft.setInputTable( self._pathTableName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 gclft.setTErefseq( consensus )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 gclft.setClean()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 gclft._db = self._iDb
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 gclft._tpA = self._iTablePathAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 mapFileName = gclft.getAllSubjectsAsMapOfQueries()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 mergeFileName = gclft.mergeRanges( mapFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 cumulCoverage = gclft.getCumulLength( mergeFileName ) #self._iTablePathAdaptator.getCumulPathLength_from_subject( consensus )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 return cumulCoverage
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 ## Get the number of full-lengths (95% <= L =< 105%)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 # @param consensusLength integer
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 # @param lLengths list of integers
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 # @return fullLengthConsensusNb integer
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 def getNbFullLengths( self, consensusLength, lLengths ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 fullLengthConsensusNb = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 for i in lLengths:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 if i / float(consensusLength ) >= 0.95 and i / float(consensusLength ) <= 1.05:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 fullLengthConsensusNb += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 return fullLengthConsensusNb
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 def getStatPerTE(self, consensusName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 dConsensusStats = {}
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 lLengthPerFragment = self._iTablePathAdaptator.getPathLengthListFromSubject(consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 lLengthPerCopy = self._iTablePathAdaptator.getChainLengthListFromSubject(consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 lIdentityPerCopy = self._iTablePathAdaptator.getChainIdentityListFromSubject(consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 dConsensusStats["length"] = self._iTableSeqAdaptator.getSeqLengthFromAccession(consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 dConsensusStats["cumulCoverage"] = self.getCumulCoverage(consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 dConsensusStats["nbFragments"] = len(lLengthPerFragment)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 dConsensusStats["nbFullLengthFragments"] = self.getNbFullLengths(dConsensusStats["length"], lLengthPerFragment)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 dConsensusStats["nbCopies"] = len(lLengthPerCopy)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 dConsensusStats["nbFullLengthCopies"] = self.getNbFullLengths(dConsensusStats["length"], lLengthPerCopy)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 dConsensusStats["statsIdentityPerChain"] = Stat()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 dConsensusStats["statsLengthPerChain"] = Stat()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 dConsensusStats["statsLengthPerChainPerc"] = Stat()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 self._statsForIdentityAndLength(dConsensusStats, lLengthPerCopy, lIdentityPerCopy)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 return dConsensusStats
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 def getStatPerCluster(self, lConsensusNames):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 dConsensusClusterStats = {}
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 lLengthPerFragment = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 lLengthPerCopy = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 cumulCoverageLength = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 for consensusName in lConsensusNames:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 cumulCoverageLength += self.getCumulCoverage(consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 lLengthPerFragment.extend(self._iTablePathAdaptator.getPathLengthListFromSubject(consensusName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 lLengthPerCopy.extend(self._iTablePathAdaptator.getChainLengthListFromSubject(consensusName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 dConsensusClusterStats["cumulCoverage"] = cumulCoverageLength
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 dConsensusClusterStats["nbFragments"] = len(lLengthPerFragment)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 dConsensusClusterStats["nbCopies"] = len(lLengthPerCopy)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 return dConsensusClusterStats
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 def getClusterListFromFile(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 lClusters = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 with open(self._clusterFileName) as fCluster:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 for line in fCluster:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 lConsensusNames = line.rstrip().split("\t")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 lClusters.append(lConsensusNames)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 return lClusters
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 def run(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 LoggerFactory.setLevel(self._log, self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 self._iDb = DbFactory.createInstance()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 self._iTablePathAdaptator = TablePathAdaptator(self._iDb, self._pathTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 self._iTableSeqAdaptator = TableSeqAdaptator(self._iDb, self._seqTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 iASW = AnnotationStatsWriter()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 if self._analysisName == "TE":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 with open(self._statsFileName, "w") as fStats:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 string = "%s\tlength\tcovg\tfrags\tfullLgthFrags\tcopies\tfullLgthCopies\tmeanId\tmeanLgth\tmeanLgthPerc\n" % self._analysisName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 fStats.write(string)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 lNamesTErefseq = self._iTableSeqAdaptator.getAccessionsList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 lDistinctSubjects = self._iTablePathAdaptator.getSubjectList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 totalCumulCoverage = self.getCumulCoverage()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 with open(self._globalStatsFileName, "w") as fG:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 fG.write("%s\n" % iASW.printResume(lNamesTErefseq, lDistinctSubjects, totalCumulCoverage, self._genomeLength))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 for consensusName in lNamesTErefseq:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 self._log.debug("processing '%s'..." % consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191 dStatForOneConsensus = self.getStatPerTE(consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 iASW.addCalculsOfOneTE(dStatForOneConsensus)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 fStats.write("%s\n" % iASW.getStatAsString(consensusName, dStatForOneConsensus))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194 fG.write(iASW.printStatsForAllTEs(len(lNamesTErefseq)))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 elif self._analysisName == "Cluster":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197 lClusters = self.getClusterListFromFile()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198 lClusters.sort(key=lambda k: len(k), reverse=True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199 with open(self._statsFileName, "w") as fStats:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 string = "%s\tcovg\tfrags\tcopies\n" % self._analysisName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 #TODO: add fullLgthFrags and fullLgthCopies ? Is addition of previous results significant ?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
202 fStats.write(string)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
203 for index, lConsensus in enumerate(lClusters):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
204 self._log.debug("processing '%s'..." % lConsensus)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
205 dStatForOneCluster = self.getStatPerCluster(lConsensus)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
206 fStats.write("%s\n" % iASW.getStatAsStringForCluster(str(index + 1), dStatForOneCluster))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
207
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
208 if self._save:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
209 outTableName = "%s_statsPer%s" % (self._pathTableName, self._analysisName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
210 self._iDb.createTable(outTableName, "pathstat", self._statsFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
211
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
212 self._iDb.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
213 self._log.info("END %s" % type(self).__name__)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
214
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
215 def _statsForIdentityAndLength(self, dStat, lLengthPerCopy, lIdentityPerCopy):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
216 for i in lIdentityPerCopy:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
217 dStat["statsIdentityPerChain"].add(i)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
218 lLengthPercPerCopy = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
219 for i in lLengthPerCopy:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
220 dStat["statsLengthPerChain"].add(i)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
221 lperc = 100 * i / float(dStat["length"])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
222 lLengthPercPerCopy.append(lperc)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
223 dStat["statsLengthPerChainPerc"].add(lperc)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
224
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
225 class AnnotationStatsWriter(object):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
226
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
227 def __init__(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
228 self._dAllTErefseqs = { "sumCumulCoverage": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
229 "totalNbFragments": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
230 "totalNbFullLengthFragments": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
231 "totalNbCopies": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
232 "totalNbFullLengthCopies": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
233 "nbFamWithFullLengthFragments": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
234 "nbFamWithOneFullLengthFragment": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
235 "nbFamWithTwoFullLengthFragments": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
236 "nbFamWithThreeFullLengthFragments": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
237 "nbFamWithMoreThanThreeFullLengthFragments": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
238 "nbFamWithFullLengthCopies": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
239 "nbFamWithOneFullLengthCopy": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
240 "nbFamWithTwoFullLengthCopies": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
241 "nbFamWithThreeFullLengthCopies": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
242 "nbFamWithMoreThanThreeFullLengthCopies": 0,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
243 "statsAllCopiesMedIdentity": Stat(),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
244 "statsAllCopiesMedLengthPerc": Stat()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
245 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
246
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
247 def getAllTEsRefSeqDict(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
248 return self._dAllTErefseqs
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
249
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
250 def getStatAsString( self, name, d ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
251 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
252 Return a string with all data properly formatted.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
253 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
254 string = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
255 string += "%s" % name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
256 string += "\t%i" % d["length"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
257 string += "\t%i" % d["cumulCoverage"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
258 string += "\t%i" % d["nbFragments"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
259 string += "\t%i" % d["nbFullLengthFragments"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
260 string += "\t%i" % d["nbCopies"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
261 string += "\t%i" % d["nbFullLengthCopies"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
262
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
263 if d["statsIdentityPerChain"].getValuesNumber() != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
264 string += "\t%.2f" % d["statsIdentityPerChain"].mean()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
265 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
266 string += "\tNA"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
267
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
268 if d["statsLengthPerChain"].getValuesNumber() != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
269 string += "\t%.2f" % d["statsLengthPerChain"].mean()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
270 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
271 string += "\tNA"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
272
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
273 if d["statsLengthPerChainPerc"].getValuesNumber() != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
274 string += "\t%.2f" % d["statsLengthPerChainPerc"].mean()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
275 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
276 string += "\tNA"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
277
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
278 return string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
279
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
280 def getStatAsStringForCluster( self, name, d ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
281 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
282 Return a string with all data properly formatted.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
283 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
284 string = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
285 string += "%s" % name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
286 string += "\t%i" % d["cumulCoverage"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
287 string += "\t%i" % d["nbFragments"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
288 string += "\t%i" % d["nbCopies"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
289
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
290 return string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
291
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
292 def addCalculsOfOneTE(self, dOneTErefseq):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
293 self._dAllTErefseqs[ "sumCumulCoverage" ] += dOneTErefseq[ "cumulCoverage" ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
294
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
295 self._dAllTErefseqs[ "totalNbFragments" ] += dOneTErefseq[ "nbFragments" ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
296 self._dAllTErefseqs[ "totalNbFullLengthFragments" ] += dOneTErefseq[ "nbFullLengthFragments" ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
297 if dOneTErefseq[ "nbFullLengthFragments" ] > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
298 self._dAllTErefseqs[ "nbFamWithFullLengthFragments" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
299 if dOneTErefseq[ "nbFullLengthFragments" ] == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
300 self._dAllTErefseqs[ "nbFamWithOneFullLengthFragment" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
301 elif dOneTErefseq[ "nbFullLengthFragments" ] == 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
302 self._dAllTErefseqs[ "nbFamWithTwoFullLengthFragments" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
303 elif dOneTErefseq[ "nbFullLengthFragments" ] == 3:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
304 self._dAllTErefseqs[ "nbFamWithThreeFullLengthFragments" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
305 elif dOneTErefseq[ "nbFullLengthFragments" ] > 3:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
306 self._dAllTErefseqs[ "nbFamWithMoreThanThreeFullLengthFragments" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
307
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
308 self._dAllTErefseqs[ "totalNbCopies" ] += dOneTErefseq[ "nbCopies" ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
309 self._dAllTErefseqs[ "totalNbFullLengthCopies" ] += dOneTErefseq[ "nbFullLengthCopies" ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
310 if dOneTErefseq[ "nbFullLengthCopies" ] > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
311 self._dAllTErefseqs[ "nbFamWithFullLengthCopies" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
312 if dOneTErefseq[ "nbFullLengthCopies" ] == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
313 self._dAllTErefseqs[ "nbFamWithOneFullLengthCopy" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
314 elif dOneTErefseq[ "nbFullLengthCopies" ] == 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
315 self._dAllTErefseqs[ "nbFamWithTwoFullLengthCopies" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
316 elif dOneTErefseq[ "nbFullLengthCopies" ] == 3:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
317 self._dAllTErefseqs[ "nbFamWithThreeFullLengthCopies" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
318 elif dOneTErefseq[ "nbFullLengthCopies" ] > 3:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
319 self._dAllTErefseqs[ "nbFamWithMoreThanThreeFullLengthCopies" ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
320
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
321 if dOneTErefseq[ "statsIdentityPerChain" ].getValuesNumber() != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
322 self._dAllTErefseqs[ "statsAllCopiesMedIdentity" ].add( dOneTErefseq[ "statsIdentityPerChain" ].median() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
323
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
324 if dOneTErefseq[ "statsLengthPerChainPerc" ].getValuesNumber() != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
325 self._dAllTErefseqs[ "statsAllCopiesMedLengthPerc" ].add( dOneTErefseq[ "statsLengthPerChainPerc" ].median() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
326
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
327 def printStatsForAllTEs(self, TEnb):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
328 # statString += "(sum of cumulative coverages: %i bp)" % ( self._dAllTErefseqs[ "sumCumulCoverage" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
329 statString = "total nb of TE fragments: %i\n" % ( self._dAllTErefseqs[ "totalNbFragments" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
330
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
331 if self._dAllTErefseqs[ "totalNbFragments" ] != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
332
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
333 statString += "total nb full-length fragments: %i (%.2f%%)\n" % \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
334 ( self._dAllTErefseqs[ "totalNbFullLengthFragments" ], \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
335 100*self._dAllTErefseqs[ "totalNbFullLengthFragments" ] / float(self._dAllTErefseqs[ "totalNbFragments" ]) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
336
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
337 statString += "total nb of TE copies: %i\n" % ( self._dAllTErefseqs[ "totalNbCopies" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
338
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
339 statString += "total nb full-length copies: %i (%.2f%%)\n" % \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
340 ( self._dAllTErefseqs[ "totalNbFullLengthCopies" ], \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
341 100*self._dAllTErefseqs[ "totalNbFullLengthCopies" ] / float(self._dAllTErefseqs[ "totalNbCopies" ]) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
342
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
343 statString += "families with full-length fragments: %i (%.2f%%)\n" % \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
344 ( self._dAllTErefseqs[ "nbFamWithFullLengthFragments" ], \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
345 100*self._dAllTErefseqs[ "nbFamWithFullLengthFragments" ] / float(TEnb) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
346 statString += " with only one full-length fragment: %i\n" % ( self._dAllTErefseqs[ "nbFamWithOneFullLengthFragment" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
347 statString += " with only two full-length fragments: %i\n" % ( self._dAllTErefseqs[ "nbFamWithTwoFullLengthFragments" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
348 statString += " with only three full-length fragments: %i\n" % ( self._dAllTErefseqs[ "nbFamWithThreeFullLengthFragments" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
349 statString += " with more than three full-length fragments: %i\n" % ( self._dAllTErefseqs[ "nbFamWithMoreThanThreeFullLengthFragments" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
350
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
351 statString += "families with full-length copies: %i (%.2f%%)\n" % \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
352 ( self._dAllTErefseqs[ "nbFamWithFullLengthCopies" ], \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
353 100*self._dAllTErefseqs[ "nbFamWithFullLengthCopies" ] / float(TEnb) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
354 statString += " with only one full-length copy: %i\n" % ( self._dAllTErefseqs[ "nbFamWithOneFullLengthCopy" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
355 statString += " with only two full-length copies: %i\n" % ( self._dAllTErefseqs[ "nbFamWithTwoFullLengthCopies" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
356 statString += " with only three full-length copies: %i\n" % ( self._dAllTErefseqs[ "nbFamWithThreeFullLengthCopies" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
357 statString += " with more than three full-length copies: %i\n" % ( self._dAllTErefseqs[ "nbFamWithMoreThanThreeFullLengthCopies" ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
358
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
359 statString += "mean of median identity of all families: %.2f +- %.2f\n" % \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
360 ( self._dAllTErefseqs[ "statsAllCopiesMedIdentity" ].mean(), \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
361 self._dAllTErefseqs[ "statsAllCopiesMedIdentity" ].sd() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
362
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
363 statString += "mean of median length percentage of all families: %.2f +- %.2f\n" % \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
364 ( self._dAllTErefseqs[ "statsAllCopiesMedLengthPerc" ].mean(), \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
365 self._dAllTErefseqs[ "statsAllCopiesMedLengthPerc" ].sd() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
366 return statString
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
367
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
368 def printResume(self, lNamesTErefseq, lDistinctSubjects, totalCumulCoverage, genomeLength):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
369 statString = "nb of sequences: %i\n" % len(lNamesTErefseq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
370 statString += "nb of matched sequences: %i\n" % len(lDistinctSubjects)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
371 statString += "cumulative coverage: %i bp\n" % totalCumulCoverage
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
372 statString += "coverage percentage: %.2f%%\n" % ( 100 * totalCumulCoverage / float(genomeLength) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
373 # statString += "processing the %i TE families..." % len(lNamesTErefseq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
374 return statString