view SMART/Java/Python/structure/TranscriptContainer.py @ 11:2da30502c2f1

Updated CompareOverlappingSmallQuery.xml
author m-zytnicki
date Thu, 14 Mar 2013 05:37:08 -0400
parents 769e306b7933
children
line wrap: on
line source

#
# Copyright INRA-URGI 2009-2010
# 
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
# 
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
# 
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
# 
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#
import re
import sys
from commons.core.parsing.ParserChooser import ParserChooser
from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter

class TranscriptContainer(object):
    """
    An interface class that contains a list of transcripts, handle different formats
    @ivar container: container of the data
    @type container: string 
    @ivar format: format of the data
    @type format: string        
    @ivar transcriptListParser: possibly contains a parser to a list of transcripts
    @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None
    @ivar mappingListParser: possibly contains a parser to a list of mappings
    @type mappingListParser: L{MapperParser<MapperParser>} or None
    @ivar transcriptTables: possibly contains the mySQL tables
    @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None
    @ivar mySqlConnection: connection to a MySQL database
    @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}
    @ivar type: type of the data (transcripts, mappings or mySQL)
    @type type: string
    @ivar verbosity: verbosity
    @type verbosity: int        
    """

    def __init__(self, container, format, verbosity = 0):
        """
        Constructor
        @param container: container of the data
        @type container: string
        @param format: format of the data
        @type format: string
        @param verbosity: verbosity
        @type verbosity: int
        """
        self.container            = container
        self.format               = format
        self.verbosity            = verbosity
        self.transcriptListParser = None
        self.mappingListParser    = None
        self.transcriptTables     = {}
        self.mySqlConnection      = None
        self.foundData            = False
        self.nbTranscripts        = None
        self.nbNucleotides        = None
        self.chromosomes          = None
        self.type                 = None
        if self.container == None:
            sys.exit("Error! Container input file name is empty!")
        if self.format == None:
            sys.exit("Error! Container input format is empty!")
        
        
    def findData(self):
        """
        Load data
        """
        if self.format == None:
            sys.exit("Error! Format is not specified!")
        if self.format == "sql":
            self.transcriptTables = {}
            self.chromosomes      = []
            self.nbTranscripts    = 0
            self.nbNucleotides    = 0
            self.type             = "sql"
            query                 = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE 'table' AND name LIKE '%s_%%_transcripts'" % (self.container))
            for line in query.getIterator():
                tableName = line[0]
                m = re.search(r"^(\S*)_transcripts$", tableName[len(self.container)+1:])
                if m == None:
                    sys.exit("Table '%s' has a strange name" % (tableName))
                chromosome = m.group(1)
                self.transcriptTables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.container, chromosome, self.verbosity)
                self.chromosomes.append(chromosome)
                for transcript in self.transcriptTables[chromosome].getIterator():
                    self.nbTranscripts += 1
                    self.nbNucleotides += transcript.getSize()
        if self.type == None:
            parserChooser = ParserChooser(self.verbosity)
            parserChooser.findFormat(self.format)
            self.type = parserChooser.getType()
            if self.type == "transcript":
                self.transcriptListParser = parserChooser.getParser(self.container)
            elif self.type == "mapping":
                self.mappingListParser = parserChooser.getParser(self.container)
            else:
                sys.exit("Error! Cannot handle format '%s'!" % (self.format))
        if self.type == None:
            sys.exit("Error! Cannot handle format '%s'!" % (self.format))

        if self.transcriptListParser != None:
            if self.type == "transcript":
                self.nbTranscripts = self.transcriptListParser.getNbTranscripts()
                self.nbNucleotides = self.transcriptListParser.getNbNucleotides()
                self.chromosomes   = self.transcriptListParser.getChromosomes()
        if self.mappingListParser != None:
            if self.type == "mapping":
                self.nbTranscripts = self.mappingListParser.getNbMappings()
                self.nbNucleotides = self.mappingListParser.getNbNucleotides()
                self.chromosomes   = self.mappingListParser.getChromosomes()

        self.foundData = True


    def getNbTranscripts(self):
        """
        Get the number of transcripts
        @return: the number of transcripts
        """
        if not self.foundData:
            self.findData()
        return self.nbTranscripts
    
    
    def getNbItems(self):
        """
        Same as getNbTranscripts
        """
        return self.getNbTranscripts()


    def getNbNucleotides(self):
        """
        Get the number of nucleotides
        @return: the number of nucleotides
        """
        if not self.foundData:
            self.findData()
        return self.nbNucleotides


    def getChromosomes(self):
        """
        Get the chromosomes
        @return: the chromosomes
        """
        if not self.foundData:
            self.findData()
        return self.chromosomes
    

    def getIterator(self):
        """
        An iterator
        @return: an iterator to a list of transcripts
        """
        if not self.foundData:
            self.findData()
        if self.type == "sql":
            for chromosome in self.transcriptTables:
                for transcript in self.transcriptTables[chromosome].getIterator():
                    yield transcript
            return
        if self.type == "transcript":
            for transcript in self.transcriptListParser.getIterator():
                yield transcript
            return
        if self.type == "mapping":
            for mapping in self.mappingListParser.getIterator():
                yield mapping.getTranscript()
            return
        sys.exit("Error! No valid transcript container given!")
        
        
    def storeIntoDatabase(self, name = None):
        """
        Store the current transcript / mapping list into database
        """
        if not self.foundData:
            self.findData()

        if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0:
            return
        
        mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity)
        mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser)
        mySqlTranscriptWriter.write()
        self.transcriptTables = mySqlTranscriptWriter.getTables()
        self.type = "sql"
            
            
    def getTables(self):
        """
        Accessor to the mySQL tables
        @return: the mySQL tables
        """
        return self.transcriptTables
        

    def setDefaultTagValue(self, name, value):
        """
        Set the given tag to the value for all transcripts
        @param name: name of the tag
        @type name: string
        @param value: value of the tag
        @type value: string
        """
        if self.type == "sql":
            for chromosome in self.transcriptTables:
                self.transcriptTables[chromosome].setDefaultTagValue(name, value)
        elif self.type == "transcript":
            self.transcriptListParser.setDefaultTagValue(name, value)
        elif self.type == "mapping":
            self.mappingListParser.setDefaultTagValue(name, value)