Mercurial > repos > yufei-luo > s_mart
diff smart_toolShed/SMART/Java/Python/structure/TranscriptContainer.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/structure/TranscriptContainer.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,236 @@ +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import re +import sys +from commons.core.parsing.ParserChooser import ParserChooser +from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable +from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter + +class TranscriptContainer(object): + """ + An interface class that contains a list of transcripts, handle different formats + @ivar container: container of the data + @type container: string + @ivar format: format of the data + @type format: string + @ivar transcriptListParser: possibly contains a parser to a list of transcripts + @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None + @ivar mappingListParser: possibly contains a parser to a list of mappings + @type mappingListParser: L{MapperParser<MapperParser>} or None + @ivar transcriptTables: possibly contains the mySQL tables + @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None + @ivar mySqlConnection: connection to a MySQL database + @type mySqlConnection: class L{MySqlConnection<MySqlConnection>} + @ivar type: type of the data (transcripts, mappings or mySQL) + @type type: string + @ivar verbosity: verbosity + @type verbosity: int + """ + + def __init__(self, container, format, verbosity = 0): + """ + Constructor + @param container: container of the data + @type container: string + @param format: format of the data + @type format: string + @param verbosity: verbosity + @type verbosity: int + """ + self.container = container + self.format = format + self.verbosity = verbosity + self.transcriptListParser = None + self.mappingListParser = None + self.transcriptTables = {} + self.mySqlConnection = None + self.foundData = False + self.nbTranscripts = None + self.nbNucleotides = None + self.chromosomes = None + self.type = None + if self.container == None: + sys.exit("Error! Container input file name is empty!") + if self.format == None: + sys.exit("Error! Container input format is empty!") + + + def findData(self): + """ + Load data + """ + if self.format == None: + sys.exit("Error! Format is not specified!") + if self.format == "sql": + self.transcriptTables = {} + self.chromosomes = [] + self.nbTranscripts = 0 + self.nbNucleotides = 0 + self.type = "sql" + query = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE 'table' AND name LIKE '%s_%%_transcripts'" % (self.container)) + for line in query.getIterator(): + tableName = line[0] + m = re.search(r"^(\S*)_transcripts$", tableName[len(self.container)+1:]) + if m == None: + sys.exit("Table '%s' has a strange name" % (tableName)) + chromosome = m.group(1) + self.transcriptTables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.container, chromosome, self.verbosity) + self.chromosomes.append(chromosome) + for transcript in self.transcriptTables[chromosome].getIterator(): + self.nbTranscripts += 1 + self.nbNucleotides += transcript.getSize() + if self.type == None: + parserChooser = ParserChooser(self.verbosity) + parserChooser.findFormat(self.format) + self.type = parserChooser.getType() + if self.type == "transcript": + self.transcriptListParser = parserChooser.getParser(self.container) + elif self.type == "mapping": + self.mappingListParser = parserChooser.getParser(self.container) + else: + sys.exit("Error! Cannot handle format '%s'!" % (self.format)) + if self.type == None: + sys.exit("Error! Cannot handle format '%s'!" % (self.format)) + + if self.transcriptListParser != None: + if self.type == "transcript": + self.nbTranscripts = self.transcriptListParser.getNbTranscripts() + self.nbNucleotides = self.transcriptListParser.getNbNucleotides() + self.chromosomes = self.transcriptListParser.getChromosomes() + if self.mappingListParser != None: + if self.type == "mapping": + self.nbTranscripts = self.mappingListParser.getNbMappings() + self.nbNucleotides = self.mappingListParser.getNbNucleotides() + self.chromosomes = self.mappingListParser.getChromosomes() + + self.foundData = True + + + def getNbTranscripts(self): + """ + Get the number of transcripts + @return: the number of transcripts + """ + if not self.foundData: + self.findData() + return self.nbTranscripts + + + def getNbItems(self): + """ + Same as getNbTranscripts + """ + return self.getNbTranscripts() + + + def getNbNucleotides(self): + """ + Get the number of nucleotides + @return: the number of nucleotides + """ + if not self.foundData: + self.findData() + return self.nbNucleotides + + + def getChromosomes(self): + """ + Get the chromosomes + @return: the chromosomes + """ + if not self.foundData: + self.findData() + return self.chromosomes + + + def getIterator(self): + """ + An iterator + @return: an iterator to a list of transcripts + """ + if not self.foundData: + self.findData() + if self.type == "sql": + for chromosome in self.transcriptTables: + for transcript in self.transcriptTables[chromosome].getIterator(): + yield transcript + return + if self.type == "transcript": + for transcript in self.transcriptListParser.getIterator(): + yield transcript + return + if self.type == "mapping": + for mapping in self.mappingListParser.getIterator(): + yield mapping.getTranscript() + return + sys.exit("Error! No valid transcript container given!") + + + def storeIntoDatabase(self, name = None): + """ + Store the current transcript / mapping list into database + """ + if not self.foundData: + self.findData() + + if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0: + return + + mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity) + mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser) + mySqlTranscriptWriter.write() + self.transcriptTables = mySqlTranscriptWriter.getTables() + self.type = "sql" + + + def getTables(self): + """ + Accessor to the mySQL tables + @return: the mySQL tables + """ + return self.transcriptTables + + + def setDefaultTagValue(self, name, value): + """ + Set the given tag to the value for all transcripts + @param name: name of the tag + @type name: string + @param value: value of the tag + @type value: string + """ + if self.type == "sql": + for chromosome in self.transcriptTables: + self.transcriptTables[chromosome].setDefaultTagValue(name, value) + elif self.type == "transcript": + self.transcriptListParser.setDefaultTagValue(name, value) + elif self.type == "mapping": + self.mappingListParser.setDefaultTagValue(name, value) +