diff smart_toolShed/SMART/Java/Python/structure/TranscriptContainer.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/SMART/Java/Python/structure/TranscriptContainer.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,236 @@
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.ParserChooser import ParserChooser
+from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
+
+class TranscriptContainer(object):
+    """
+    An interface class that contains a list of transcripts, handle different formats
+    @ivar container: container of the data
+    @type container: string 
+    @ivar format: format of the data
+    @type format: string        
+    @ivar transcriptListParser: possibly contains a parser to a list of transcripts
+    @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None
+    @ivar mappingListParser: possibly contains a parser to a list of mappings
+    @type mappingListParser: L{MapperParser<MapperParser>} or None
+    @ivar transcriptTables: possibly contains the mySQL tables
+    @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None
+    @ivar mySqlConnection: connection to a MySQL database
+    @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}
+    @ivar type: type of the data (transcripts, mappings or mySQL)
+    @type type: string
+    @ivar verbosity: verbosity
+    @type verbosity: int        
+    """
+
+    def __init__(self, container, format, verbosity = 0):
+        """
+        Constructor
+        @param container: container of the data
+        @type container: string
+        @param format: format of the data
+        @type format: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.container            = container
+        self.format               = format
+        self.verbosity            = verbosity
+        self.transcriptListParser = None
+        self.mappingListParser    = None
+        self.transcriptTables     = {}
+        self.mySqlConnection      = None
+        self.foundData            = False
+        self.nbTranscripts        = None
+        self.nbNucleotides        = None
+        self.chromosomes          = None
+        self.type                 = None
+        if self.container == None:
+            sys.exit("Error! Container input file name is empty!")
+        if self.format == None:
+            sys.exit("Error! Container input format is empty!")
+        
+        
+    def findData(self):
+        """
+        Load data
+        """
+        if self.format == None:
+            sys.exit("Error! Format is not specified!")
+        if self.format == "sql":
+            self.transcriptTables = {}
+            self.chromosomes      = []
+            self.nbTranscripts    = 0
+            self.nbNucleotides    = 0
+            self.type             = "sql"
+            query                 = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE 'table' AND name LIKE '%s_%%_transcripts'" % (self.container))
+            for line in query.getIterator():
+                tableName = line[0]
+                m = re.search(r"^(\S*)_transcripts$", tableName[len(self.container)+1:])
+                if m == None:
+                    sys.exit("Table '%s' has a strange name" % (tableName))
+                chromosome = m.group(1)
+                self.transcriptTables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.container, chromosome, self.verbosity)
+                self.chromosomes.append(chromosome)
+                for transcript in self.transcriptTables[chromosome].getIterator():
+                    self.nbTranscripts += 1
+                    self.nbNucleotides += transcript.getSize()
+        if self.type == None:
+            parserChooser = ParserChooser(self.verbosity)
+            parserChooser.findFormat(self.format)
+            self.type = parserChooser.getType()
+            if self.type == "transcript":
+                self.transcriptListParser = parserChooser.getParser(self.container)
+            elif self.type == "mapping":
+                self.mappingListParser = parserChooser.getParser(self.container)
+            else:
+                sys.exit("Error! Cannot handle format '%s'!" % (self.format))
+        if self.type == None:
+            sys.exit("Error! Cannot handle format '%s'!" % (self.format))
+
+        if self.transcriptListParser != None:
+            if self.type == "transcript":
+                self.nbTranscripts = self.transcriptListParser.getNbTranscripts()
+                self.nbNucleotides = self.transcriptListParser.getNbNucleotides()
+                self.chromosomes   = self.transcriptListParser.getChromosomes()
+        if self.mappingListParser != None:
+            if self.type == "mapping":
+                self.nbTranscripts = self.mappingListParser.getNbMappings()
+                self.nbNucleotides = self.mappingListParser.getNbNucleotides()
+                self.chromosomes   = self.mappingListParser.getChromosomes()
+
+        self.foundData = True
+
+
+    def getNbTranscripts(self):
+        """
+        Get the number of transcripts
+        @return: the number of transcripts
+        """
+        if not self.foundData:
+            self.findData()
+        return self.nbTranscripts
+    
+    
+    def getNbItems(self):
+        """
+        Same as getNbTranscripts
+        """
+        return self.getNbTranscripts()
+
+
+    def getNbNucleotides(self):
+        """
+        Get the number of nucleotides
+        @return: the number of nucleotides
+        """
+        if not self.foundData:
+            self.findData()
+        return self.nbNucleotides
+
+
+    def getChromosomes(self):
+        """
+        Get the chromosomes
+        @return: the chromosomes
+        """
+        if not self.foundData:
+            self.findData()
+        return self.chromosomes
+    
+
+    def getIterator(self):
+        """
+        An iterator
+        @return: an iterator to a list of transcripts
+        """
+        if not self.foundData:
+            self.findData()
+        if self.type == "sql":
+            for chromosome in self.transcriptTables:
+                for transcript in self.transcriptTables[chromosome].getIterator():
+                    yield transcript
+            return
+        if self.type == "transcript":
+            for transcript in self.transcriptListParser.getIterator():
+                yield transcript
+            return
+        if self.type == "mapping":
+            for mapping in self.mappingListParser.getIterator():
+                yield mapping.getTranscript()
+            return
+        sys.exit("Error! No valid transcript container given!")
+        
+        
+    def storeIntoDatabase(self, name = None):
+        """
+        Store the current transcript / mapping list into database
+        """
+        if not self.foundData:
+            self.findData()
+
+        if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0:
+            return
+        
+        mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity)
+        mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser)
+        mySqlTranscriptWriter.write()
+        self.transcriptTables = mySqlTranscriptWriter.getTables()
+        self.type = "sql"
+            
+            
+    def getTables(self):
+        """
+        Accessor to the mySQL tables
+        @return: the mySQL tables
+        """
+        return self.transcriptTables
+        
+
+    def setDefaultTagValue(self, name, value):
+        """
+        Set the given tag to the value for all transcripts
+        @param name: name of the tag
+        @type name: string
+        @param value: value of the tag
+        @type value: string
+        """
+        if self.type == "sql":
+            for chromosome in self.transcriptTables:
+                self.transcriptTables[chromosome].setDefaultTagValue(name, value)
+        elif self.type == "transcript":
+            self.transcriptListParser.setDefaultTagValue(name, value)
+        elif self.type == "mapping":
+            self.mappingListParser.setDefaultTagValue(name, value)
+