Mercurial > repos > urgi-team > teiso
diff TEisotools-1.0/commons/core/coord/CountOverlapping.py @ 6:20ec0d14798e draft
Uploaded
| author | urgi-team |
|---|---|
| date | Wed, 20 Jul 2016 05:00:24 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TEisotools-1.0/commons/core/coord/CountOverlapping.py Wed Jul 20 05:00:24 2016 -0400 @@ -0,0 +1,96 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +import bisect +from commons.core.checker.RepetException import RepetException +from commons.core.LoggerFactory import LoggerFactory + +LOG_DEPTH = "commons.coord" +class CountOverlapping(object): + + ## lFeatures must be a list of objects implementing getStart, getEnd, getSeqname methods. + # If areFeaturesOnDirectStrandsOnly is set to False, isOnReverseStrand and reverse methods must be implemented too. + # Throws a RepetException if all the features in lFeatures don't share the same getSeqname() result + # + # This implementation may not be very efficient but it works + # + def __init__(self, lFeatures, areFeaturesOnDirectStrandsOnly = False, verbosity = 2): + self._verbosity = verbosity + self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) + + self._areFeaturesOnDirectStrandsOnly = areFeaturesOnDirectStrandsOnly + self._lFeaturesToCheck = lFeatures + self._prepareData() + + def _prepareData(self): + self._nbFeatures = len(self._lFeaturesToCheck) + sNames = set() + for seqName in [iFeature.getSeqname() for iFeature in self._lFeaturesToCheck]: + sNames.add(seqName) + + if len(sNames) not in [0, 1]: + self._logAndRaise("ERROR: different sequence names in input features list") + + if not self._areFeaturesOnDirectStrandsOnly: + for iFeature in self._lFeaturesToCheck: + if iFeature.isOnReverseStrand(): + iFeature.reverse() + self._areFeaturesOnDirectStrandsOnly = True + + def _logAndRaise(self, errorMsg): + self._log.error(errorMsg) + raise RepetException(errorMsg) + + ## Count number of features overlapping with a given interval + # + # @param queryInterval feature to check overlaps number with (must implement getStart, getEnd, getSeqname, isOnReverseStrand and reverse methods) + # @return int number of input features overlapping with queryInterval + # + def count(self, queryInterval): + if queryInterval.isOnReverseStrand(): + queryInterval.reverse() + if self._nbFeatures == 0: + self._log.warning("WARNING: empty feature list. Will return 0 overlap.") + return 0 + else: + featuresName = self._lFeaturesToCheck[0].getSeqname() + queryName = queryInterval.getSeqname() + if featuresName != queryName: + self._log.warning("WARNING: different sequence names between feature '%s' and queryInterval '%s'. Will return 0 overlap." % (featuresName, queryName)) + + lOrderedStart = [iFeature.getStart() for iFeature in self._lFeaturesToCheck] + lOrderedEnd = [iFeature.getEnd() for iFeature in self._lFeaturesToCheck] + + lOrderedStart.sort() + lOrderedEnd.sort() + + first = bisect.bisect_right(lOrderedStart, queryInterval.getEnd()) + last = bisect.bisect_right(lOrderedEnd, queryInterval.getStart()) + return self._nbFeatures - (last +(self._nbFeatures - first))
