view SMART/Java/Python/ncList/NCListCursor.py @ 69:1473ab954708 draft

Corrected bug in "CollapsedReads" XML file.
author m-zytnicki
date Wed, 18 Nov 2015 10:59:02 -0500
parents 769e306b7933
children
line wrap: on
line source

#! /usr/bin/env python
#
# Copyright INRA-URGI 2009-2010
# 
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
# 
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
# 
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
# 
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#
import os, os.path, struct
from commons.core.parsing.GffParser import GffParser
from SMART.Java.Python.misc.Progress import Progress


class Data(object):
    def __init__(self, hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end):
        self.hIndex           = hIndex
        self.transcript       = transcript
        self.firstChildLIndex = firstChildLIndex
        self.lastChildLIndex  = lastChildLIndex
        self.start            = start
        self.end              = end

class NCListCursor(object):

    def __init__(self, cursor = None, ncList = None, lIndex = 0, verbosity = 0):
        self._verbosity = verbosity
        self._mainListData = []
        if cursor:
            self.copy(cursor)
        else:
            self._ncList = ncList
            self.setLIndex(lIndex)

    def setLIndex(self, lIndex):
        self._lIndex             = lIndex
        self._start              = None
        self._end                = None
        self._hIndex             = None
        self._gffIndex           = None
        self._parentGffIndex     = None
        self._parentLIndex       = None
        self._parentHIndex       = None
        self._parentStart        = None
        self._parentEnd          = None
        self._transcript         = None
        self._firstSiblingLIndex = None
        self._lastSiblingLIndex  = None
        self._firstChildLIndex   = None
        self._lastChildLIndex    = None
        self._mainListIndex      = lIndex if lIndex < self._ncList.getSizeFirstList() else None

    def precompute(self):
        self._mainListIndex = 0
        progress = Progress(self._ncList.getSizeFirstList(), "Precomputing data", self._verbosity)
        for i in range(self._ncList.getSizeFirstList()):
            gffIndex, hIndex, parentLIndex, start, end = self._ncList.getLLineElements(i)
            transcript = self._ncList.getIntervalFromAdress(gffIndex)
            firstChildLIndex, nbChildren = self._ncList.getHLineElements(hIndex)
            lastChildLIndex = -1 if firstChildLIndex == -1 else firstChildLIndex + nbChildren-1
            self._mainListData.append(Data(hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end))
            progress.inc()
        progress.done()

    def _updateFromMainListData(self):
        if not self._mainListData or self._lIndex >= self._ncList.getSizeFirstList():
            #print "OUT"
            return False
        if self._mainListIndex >= self._ncList.getSizeFirstList():
            self._hIndex = -1
        data = self._mainListData[self._mainListIndex]
        self._hIndex           = data.hIndex
        self._transcript       = data.transcript
        self._firstChildLIndex = data.firstChildLIndex
        self._lastChildLIndex  = data.lastChildLIndex
        self._start            = data.start
        self._end              = data.end
        return True

    def getLIndex(self):
        return self._lIndex

    def _getCurrentData(self):
        self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end = self._ncList.getLLineElements(self._lIndex)
        #print "-->", self._lIndex, "-->", self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end
        if self._end == -1:
            raise Exception("Error")

    def _getParentData(self):
        if self._parentLIndex == None:
            self._getCurrentData()
        self._parentGffIndex, self._parentHIndex, greatParentLIndex, self._parentStart, self._parentEnd = self._ncList.getLLineElements(self._parentLIndex)

    def _getTranscript(self):
        if self._gffIndex == None:
            self._getCurrentData()
        self._transcript = self._ncList.getIntervalFromAdress(self._gffIndex)

    def _getSiblingData(self):
        if self._parentHIndex == None:
            self._getParentData()
        if self._parentHIndex == -1:
            self._firstSiblingLIndex = 0
            self._lastSiblingLIndex  = self._ncList.getSizeFirstList() - 1
        else:
            self._firstSiblingLIndex, nbSiblings = self._ncList.getHLineElements(self._parentHIndex)
            self._lastSiblingLIndex = -1 if self._firstSiblingLIndex == -1 else self._firstSiblingLIndex + nbSiblings-1

    def _getChildrenData(self):
        if self._hIndex == None:
            self._getCurrentData()
        self._firstChildLIndex, nbChildren = self._ncList.getHLineElements(self._hIndex)
        self._lastChildLIndex = -1 if self._firstChildLIndex == -1 else self._firstChildLIndex + nbChildren-1

    def getGffAddress(self):
        if self._gffIndex == None:
            self._getCurrentData()
        return self._gffIndex

    def getStart(self):
        if self._start == None:
            self._getCurrentData()
        return self._start

    def getEnd(self):
        if self._end == None:
            self._getCurrentData()
        return self._end

    def compare(self, cursor):
        return (self._lIndex == cursor._lIndex)

    def getTranscript(self):
        if self.isOut():
            return None
        if self._transcript == None:
            self._getTranscript()
        return self._transcript
        
    def isFirst(self):
        #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex
        if self._lIndex < self._ncList.getSizeFirstList() - 1:
            return (self._lIndex == 0)
        if self._firstSiblingLIndex == None:
            self._getSiblingData()
        return (self._lIndex == self._firstSiblingLIndex)
        
    def isLast(self):
        #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex
        if self._lIndex < self._ncList.getSizeFirstList() - 1:
            return (self._lIndex == self._ncList.getSizeFirstList() - 1)
        if self._lastSiblingLIndex == None:
            self._getSiblingData()
        return (self._lIndex == self._lastSiblingLIndex)
        
    def moveUp(self):
        if self._parentLIndex == None:
            self._getCurrentData()
        self._lIndex = self._parentLIndex
        self._updateFromMainListData()
        self._hIndex             = self._parentHIndex
        self._gffIndex           = self._parentGffIndex
        self._parentLIndex       = None
        self._parentHIndex       = None
        self._parentGffIndex     = None
        self._transcript         = None
        self._firstSiblingLIndex = None
        self._lastSiblingLIndex  = None
        self._firstChildLIndex   = self._firstChildLIndex
        self._lastChildLIndex    = self._lastChildLIndex
        self._start              = self._parentStart
        self._end                = self._parentEnd
        self._parentStart        = None
        self._parentEnd          = None
        
    def moveRight(self):
        if self.isOut():
            return
        #print "IN1", self
        if self._lIndex < self._ncList.getSizeFirstList() - 1 and self._mainListIndex != None:
            self._mainListIndex += 1
            self._updateFromMainListData()
        #print "IN2", self
        self._lIndex          += 1
        self._hIndex           = None
        self._start            = None
        self._end              = None
        self._transcript       = None
        self._gffIndex         = None
        self._firstChildLIndex = None
        self._lastChildLIndex  = None
        #print "IN3", self
        
    def moveNext(self):
        while not self.isOut() and self.isLast():
            if self.isTop():
                self._lIndex = -1
                return
            self.moveUp()
        #print "F1", self
        self.moveRight()
        #print "F2", self
    
    def moveMiddleSibling(self):
        if self._lIndex < self._ncList.getSizeFirstList() - 1:
            self._mainListIndex = (self._ncList.getSizeFirstList() - 1) / 2
            self._updateFromMainListData()
        if self._lastSiblingLIndex == None:
            self._getSiblingData()
        self._lIndex           = (self._lastSiblingLIndex + self._firstSiblingLIndex) / 2
        self._hIndex           = None
        self._start            = None
        self._end              = None
        self._gffIndex         = None
        self._transcript       = None
        self._firstChildLIndex = None
        self._lastChildLIndex  = None

    def moveSibling(self, lIndex):
        if self._lIndex < self._ncList.getSizeFirstList() - 1:
            self._mainListIndex = lIndex
            self._updateFromMainListData()
        self._lIndex           = lIndex
        self._hIndex           = None
        self._start            = None
        self._end              = None
        self._gffIndex         = None
        self._transcript       = None
        self._firstChildLIndex = None
        self._lastChildLIndex  = None

    def moveLastSibling(self):
        if self._lIndex < self._ncList.getSizeFirstList() - 1:
            self._mainListIndex = self._ncList.getSizeFirstList() - 1
            self._updateFromMainListData()
        if self._lastSiblingLIndex == None:
            self._getSiblingData()
        self._lIndex           = self._lastSiblingLIndex
        self._hIndex           = None
        self._start            = None
        self._end              = None
        self._gffIndex         = None
        self._transcript       = None
        self._firstChildLIndex = None
        self._lastChildLIndex  = None

    def moveDown(self):
        if self._firstChildLIndex == None:
            self._getChildrenData()
        self._parentLIndex      = self._lIndex
        self._parentHIndex      = self._hIndex
        self._parentGffIndex    = self._gffIndex
        self._lIndex            = self._firstChildLIndex
        self._lastSiblingLIndex = self._lastChildLIndex
        self._hIndex            = None
        self._gffIndex          = None
        self._transcript        = None
        self._firstChildLIndex  = None
        self._lastChildLIndex   = None
        self._parentStart       = self._start
        self._parentEnd         = self._end
        self._start             = None
        self._end               = None

    def isOut(self):
        return (self._lIndex == -1)

    def isTop(self):
        if self._parentLIndex == None:
            self._getCurrentData()
        return (self._parentLIndex == -1)

    def hasChildren(self):
        if self._hIndex == None:
            self._getCurrentData()
        if self._hIndex == -1:
            return False
        if self._firstChildLIndex == None:
            self._getChildrenData()
        return (self._firstChildLIndex != -1)

    def copy(self, cursor):
        self._ncList             = cursor._ncList
        self._lIndex             = cursor._lIndex
        self._hIndex             = cursor._hIndex
        self._gffIndex           = cursor._gffIndex
        self._parentLIndex       = cursor._parentLIndex
        self._parentHIndex       = cursor._parentHIndex
        self._parentGffIndex     = cursor._parentGffIndex
        self._transcript         = cursor._transcript
        self._firstSiblingLIndex = cursor._firstSiblingLIndex
        self._lastSiblingLIndex  = cursor._lastSiblingLIndex
        self._firstChildLIndex   = cursor._firstChildLIndex
        self._lastChildLIndex    = cursor._lastChildLIndex
        self._mainListData       = cursor._mainListData
        self._mainListIndex      = cursor._mainListIndex
        self._verbosity          = cursor._verbosity
        self._parentStart        = cursor._parentStart
        self._parentEnd          = cursor._parentEnd
        self._start              = cursor._start
        self._end                = cursor._end

    def __str__(self):
        return "NC-list: %s, Lindex: %s, Hindex: %s, GFFindex: %s, start: %s, end: %s, parent Lindex: %s, parent Hindex: %s, parent GFFindex: %s, transcript: %s, last sibling: %s" % (self._ncList, self._lIndex, self._hIndex, self._gffIndex, self._start, self._end, self._parentLIndex, self._parentHIndex, self._parentGffIndex, self._transcript, self._lastSiblingLIndex)