view commons/core/parsing/MapperParser.py @ 58:5f5c9b74c2dd

Uploaded
author m-zytnicki
date Fri, 07 Feb 2014 11:53:36 -0500
parents 769e306b7933
children
line wrap: on
line source

#
# Copyright INRA-URGI 2009-2010
# 
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
# 
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
# 
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
# 
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#
import sys
from SMART.Java.Python.structure.Mapping import Mapping


class MapperParser(object):
    """An interface that parses the output of a generic mapper"""

    def __init__(self, fileName, verbosity = 0):
        super(MapperParser, self).__init__()
        self.verbosity = verbosity
        self.nbMappings = None
        self.chromosomes = None
        self.size = None
        self.currentMapping = Mapping()
        self.handle = open(fileName)
        self.currentLineNb = 0
        self.skipFirstLines()
        self.fileName = fileName
        self.startingPoint = self.handle.tell()


    def __del__(self):
        self.handle.close()
        

    def reset(self):
        self.handle.seek(self.startingPoint)
        self.currentLineNb = 0


    def getNextMapping(self):
        for line in self.handle:
            mapping = self.parseLine(line)
            self.currentLineNb += 1
            if mapping != None:
                return mapping
        return False
        
        
    def getIterator(self):
        self.reset()
        mapping = self.getNextMapping()
        while mapping:
            yield mapping
            mapping = self.getNextMapping()
                
                
    def getInfos(self):
        self.chromosomes = set()
        self.nbMappings = 0
        self.size = 0
        self.reset()
        if self.verbosity >= 10:
            print "Getting information."
        for mapping in self.getIterator():
            transcript = mapping.getTranscript()
            self.chromosomes.add(transcript.getChromosome())
            self.nbMappings += 1
            self.size += transcript.getSize()
            if self.verbosity >= 10 and self.nbMappings % 100000 == 0:
                sys.stdout.write("    %d mappings read\r" % (self.nbMappings))
                sys.stdout.flush()
        self.reset()
        if self.verbosity >= 10:
            print "    %d mappings read" % (self.nbMappings)
            print "Done."


    def getNbMappings(self):
        if self.nbMappings != None:
            return self.nbMappings
        self.getInfos()
        return self.nbMappings


    def getNbItems(self):
        return self.getNbMappings()


    def getChromosomes(self):
        if self.chromosomes != None:
            return self.chromosomes
        self.getInfos()
        return self.chromosomes
    
    
    def getSize(self):
        if self.size != None:
            return self.size
        self.getInfos()
        return self.size
    
    
    def getNbNucleotides(self):
        return self.getSize()


    def setDefaultTagValue(self, name, value):
        for mapping in self.getIterator():
            mapping.setTagValue(name, value)