Mercurial > repos > yufei-luo > s_mart
view SMART/Java/Python/ncList/NCListCursor.py @ 69:1473ab954708 draft
Corrected bug in "CollapsedReads" XML file.
author | m-zytnicki |
---|---|
date | Wed, 18 Nov 2015 10:59:02 -0500 |
parents | 769e306b7933 |
children |
line wrap: on
line source
#! /usr/bin/env python # # Copyright INRA-URGI 2009-2010 # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. # import os, os.path, struct from commons.core.parsing.GffParser import GffParser from SMART.Java.Python.misc.Progress import Progress class Data(object): def __init__(self, hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end): self.hIndex = hIndex self.transcript = transcript self.firstChildLIndex = firstChildLIndex self.lastChildLIndex = lastChildLIndex self.start = start self.end = end class NCListCursor(object): def __init__(self, cursor = None, ncList = None, lIndex = 0, verbosity = 0): self._verbosity = verbosity self._mainListData = [] if cursor: self.copy(cursor) else: self._ncList = ncList self.setLIndex(lIndex) def setLIndex(self, lIndex): self._lIndex = lIndex self._start = None self._end = None self._hIndex = None self._gffIndex = None self._parentGffIndex = None self._parentLIndex = None self._parentHIndex = None self._parentStart = None self._parentEnd = None self._transcript = None self._firstSiblingLIndex = None self._lastSiblingLIndex = None self._firstChildLIndex = None self._lastChildLIndex = None self._mainListIndex = lIndex if lIndex < self._ncList.getSizeFirstList() else None def precompute(self): self._mainListIndex = 0 progress = Progress(self._ncList.getSizeFirstList(), "Precomputing data", self._verbosity) for i in range(self._ncList.getSizeFirstList()): gffIndex, hIndex, parentLIndex, start, end = self._ncList.getLLineElements(i) transcript = self._ncList.getIntervalFromAdress(gffIndex) firstChildLIndex, nbChildren = self._ncList.getHLineElements(hIndex) lastChildLIndex = -1 if firstChildLIndex == -1 else firstChildLIndex + nbChildren-1 self._mainListData.append(Data(hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end)) progress.inc() progress.done() def _updateFromMainListData(self): if not self._mainListData or self._lIndex >= self._ncList.getSizeFirstList(): #print "OUT" return False if self._mainListIndex >= self._ncList.getSizeFirstList(): self._hIndex = -1 data = self._mainListData[self._mainListIndex] self._hIndex = data.hIndex self._transcript = data.transcript self._firstChildLIndex = data.firstChildLIndex self._lastChildLIndex = data.lastChildLIndex self._start = data.start self._end = data.end return True def getLIndex(self): return self._lIndex def _getCurrentData(self): self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end = self._ncList.getLLineElements(self._lIndex) #print "-->", self._lIndex, "-->", self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end if self._end == -1: raise Exception("Error") def _getParentData(self): if self._parentLIndex == None: self._getCurrentData() self._parentGffIndex, self._parentHIndex, greatParentLIndex, self._parentStart, self._parentEnd = self._ncList.getLLineElements(self._parentLIndex) def _getTranscript(self): if self._gffIndex == None: self._getCurrentData() self._transcript = self._ncList.getIntervalFromAdress(self._gffIndex) def _getSiblingData(self): if self._parentHIndex == None: self._getParentData() if self._parentHIndex == -1: self._firstSiblingLIndex = 0 self._lastSiblingLIndex = self._ncList.getSizeFirstList() - 1 else: self._firstSiblingLIndex, nbSiblings = self._ncList.getHLineElements(self._parentHIndex) self._lastSiblingLIndex = -1 if self._firstSiblingLIndex == -1 else self._firstSiblingLIndex + nbSiblings-1 def _getChildrenData(self): if self._hIndex == None: self._getCurrentData() self._firstChildLIndex, nbChildren = self._ncList.getHLineElements(self._hIndex) self._lastChildLIndex = -1 if self._firstChildLIndex == -1 else self._firstChildLIndex + nbChildren-1 def getGffAddress(self): if self._gffIndex == None: self._getCurrentData() return self._gffIndex def getStart(self): if self._start == None: self._getCurrentData() return self._start def getEnd(self): if self._end == None: self._getCurrentData() return self._end def compare(self, cursor): return (self._lIndex == cursor._lIndex) def getTranscript(self): if self.isOut(): return None if self._transcript == None: self._getTranscript() return self._transcript def isFirst(self): #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex if self._lIndex < self._ncList.getSizeFirstList() - 1: return (self._lIndex == 0) if self._firstSiblingLIndex == None: self._getSiblingData() return (self._lIndex == self._firstSiblingLIndex) def isLast(self): #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex if self._lIndex < self._ncList.getSizeFirstList() - 1: return (self._lIndex == self._ncList.getSizeFirstList() - 1) if self._lastSiblingLIndex == None: self._getSiblingData() return (self._lIndex == self._lastSiblingLIndex) def moveUp(self): if self._parentLIndex == None: self._getCurrentData() self._lIndex = self._parentLIndex self._updateFromMainListData() self._hIndex = self._parentHIndex self._gffIndex = self._parentGffIndex self._parentLIndex = None self._parentHIndex = None self._parentGffIndex = None self._transcript = None self._firstSiblingLIndex = None self._lastSiblingLIndex = None self._firstChildLIndex = self._firstChildLIndex self._lastChildLIndex = self._lastChildLIndex self._start = self._parentStart self._end = self._parentEnd self._parentStart = None self._parentEnd = None def moveRight(self): if self.isOut(): return #print "IN1", self if self._lIndex < self._ncList.getSizeFirstList() - 1 and self._mainListIndex != None: self._mainListIndex += 1 self._updateFromMainListData() #print "IN2", self self._lIndex += 1 self._hIndex = None self._start = None self._end = None self._transcript = None self._gffIndex = None self._firstChildLIndex = None self._lastChildLIndex = None #print "IN3", self def moveNext(self): while not self.isOut() and self.isLast(): if self.isTop(): self._lIndex = -1 return self.moveUp() #print "F1", self self.moveRight() #print "F2", self def moveMiddleSibling(self): if self._lIndex < self._ncList.getSizeFirstList() - 1: self._mainListIndex = (self._ncList.getSizeFirstList() - 1) / 2 self._updateFromMainListData() if self._lastSiblingLIndex == None: self._getSiblingData() self._lIndex = (self._lastSiblingLIndex + self._firstSiblingLIndex) / 2 self._hIndex = None self._start = None self._end = None self._gffIndex = None self._transcript = None self._firstChildLIndex = None self._lastChildLIndex = None def moveSibling(self, lIndex): if self._lIndex < self._ncList.getSizeFirstList() - 1: self._mainListIndex = lIndex self._updateFromMainListData() self._lIndex = lIndex self._hIndex = None self._start = None self._end = None self._gffIndex = None self._transcript = None self._firstChildLIndex = None self._lastChildLIndex = None def moveLastSibling(self): if self._lIndex < self._ncList.getSizeFirstList() - 1: self._mainListIndex = self._ncList.getSizeFirstList() - 1 self._updateFromMainListData() if self._lastSiblingLIndex == None: self._getSiblingData() self._lIndex = self._lastSiblingLIndex self._hIndex = None self._start = None self._end = None self._gffIndex = None self._transcript = None self._firstChildLIndex = None self._lastChildLIndex = None def moveDown(self): if self._firstChildLIndex == None: self._getChildrenData() self._parentLIndex = self._lIndex self._parentHIndex = self._hIndex self._parentGffIndex = self._gffIndex self._lIndex = self._firstChildLIndex self._lastSiblingLIndex = self._lastChildLIndex self._hIndex = None self._gffIndex = None self._transcript = None self._firstChildLIndex = None self._lastChildLIndex = None self._parentStart = self._start self._parentEnd = self._end self._start = None self._end = None def isOut(self): return (self._lIndex == -1) def isTop(self): if self._parentLIndex == None: self._getCurrentData() return (self._parentLIndex == -1) def hasChildren(self): if self._hIndex == None: self._getCurrentData() if self._hIndex == -1: return False if self._firstChildLIndex == None: self._getChildrenData() return (self._firstChildLIndex != -1) def copy(self, cursor): self._ncList = cursor._ncList self._lIndex = cursor._lIndex self._hIndex = cursor._hIndex self._gffIndex = cursor._gffIndex self._parentLIndex = cursor._parentLIndex self._parentHIndex = cursor._parentHIndex self._parentGffIndex = cursor._parentGffIndex self._transcript = cursor._transcript self._firstSiblingLIndex = cursor._firstSiblingLIndex self._lastSiblingLIndex = cursor._lastSiblingLIndex self._firstChildLIndex = cursor._firstChildLIndex self._lastChildLIndex = cursor._lastChildLIndex self._mainListData = cursor._mainListData self._mainListIndex = cursor._mainListIndex self._verbosity = cursor._verbosity self._parentStart = cursor._parentStart self._parentEnd = cursor._parentEnd self._start = cursor._start self._end = cursor._end def __str__(self): return "NC-list: %s, Lindex: %s, Hindex: %s, GFFindex: %s, start: %s, end: %s, parent Lindex: %s, parent Hindex: %s, parent GFFindex: %s, transcript: %s, last sibling: %s" % (self._ncList, self._lIndex, self._hIndex, self._gffIndex, self._start, self._end, self._parentLIndex, self._parentHIndex, self._parentGffIndex, self._transcript, self._lastSiblingLIndex)