6
+ − 1 #
+ − 2 # Copyright INRA-URGI 2009-2010
+ − 3 #
+ − 4 # This software is governed by the CeCILL license under French law and
+ − 5 # abiding by the rules of distribution of free software. You can use,
+ − 6 # modify and/ or redistribute the software under the terms of the CeCILL
+ − 7 # license as circulated by CEA, CNRS and INRIA at the following URL
+ − 8 # "http://www.cecill.info".
+ − 9 #
+ − 10 # As a counterpart to the access to the source code and rights to copy,
+ − 11 # modify and redistribute granted by the license, users are provided only
+ − 12 # with a limited warranty and the software's author, the holder of the
+ − 13 # economic rights, and the successive licensors have only limited
+ − 14 # liability.
+ − 15 #
+ − 16 # In this respect, the user's attention is drawn to the risks associated
+ − 17 # with loading, using, modifying and/or developing or reproducing the
+ − 18 # software by the user in light of its specific status of free software,
+ − 19 # that may mean that it is complicated to manipulate, and that also
+ − 20 # therefore means that it is reserved for developers and experienced
+ − 21 # professionals having in-depth computer knowledge. Users are therefore
+ − 22 # encouraged to load and test the software's suitability as regards their
+ − 23 # requirements in conditions enabling the security of their systems and/or
+ − 24 # data to be ensured and, more generally, to use and operate it in the
+ − 25 # same conditions as regards security.
+ − 26 #
+ − 27 # The fact that you are presently reading this means that you have had
+ − 28 # knowledge of the CeCILL license and that you accept its terms.
+ − 29 #
+ − 30 import re
+ − 31 import sys
+ − 32 from commons.core.parsing.ParserChooser import ParserChooser
+ − 33 from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
+ − 34 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
+ − 35
+ − 36 class TranscriptContainer(object):
+ − 37 """
+ − 38 An interface class that contains a list of transcripts, handle different formats
+ − 39 @ivar container: container of the data
+ − 40 @type container: string
+ − 41 @ivar format: format of the data
+ − 42 @type format: string
+ − 43 @ivar transcriptListParser: possibly contains a parser to a list of transcripts
+ − 44 @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None
+ − 45 @ivar mappingListParser: possibly contains a parser to a list of mappings
+ − 46 @type mappingListParser: L{MapperParser<MapperParser>} or None
+ − 47 @ivar transcriptTables: possibly contains the mySQL tables
+ − 48 @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None
+ − 49 @ivar mySqlConnection: connection to a MySQL database
+ − 50 @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}
+ − 51 @ivar type: type of the data (transcripts, mappings or mySQL)
+ − 52 @type type: string
+ − 53 @ivar verbosity: verbosity
+ − 54 @type verbosity: int
+ − 55 """
+ − 56
+ − 57 def __init__(self, container, format, verbosity = 0):
+ − 58 """
+ − 59 Constructor
+ − 60 @param container: container of the data
+ − 61 @type container: string
+ − 62 @param format: format of the data
+ − 63 @type format: string
+ − 64 @param verbosity: verbosity
+ − 65 @type verbosity: int
+ − 66 """
+ − 67 self.container = container
+ − 68 self.format = format
+ − 69 self.verbosity = verbosity
+ − 70 self.transcriptListParser = None
+ − 71 self.mappingListParser = None
+ − 72 self.transcriptTables = {}
+ − 73 self.mySqlConnection = None
+ − 74 self.foundData = False
+ − 75 self.nbTranscripts = None
+ − 76 self.nbNucleotides = None
+ − 77 self.chromosomes = None
+ − 78 self.type = None
+ − 79 if self.container == None:
+ − 80 sys.exit("Error! Container input file name is empty!")
+ − 81 if self.format == None:
+ − 82 sys.exit("Error! Container input format is empty!")
+ − 83
+ − 84
+ − 85 def findData(self):
+ − 86 """
+ − 87 Load data
+ − 88 """
+ − 89 if self.format == None:
+ − 90 sys.exit("Error! Format is not specified!")
+ − 91 if self.format == "sql":
+ − 92 self.transcriptTables = {}
+ − 93 self.chromosomes = []
+ − 94 self.nbTranscripts = 0
+ − 95 self.nbNucleotides = 0
+ − 96 self.type = "sql"
+ − 97 query = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE 'table' AND name LIKE '%s_%%_transcripts'" % (self.container))
+ − 98 for line in query.getIterator():
+ − 99 tableName = line[0]
+ − 100 m = re.search(r"^(\S*)_transcripts$", tableName[len(self.container)+1:])
+ − 101 if m == None:
+ − 102 sys.exit("Table '%s' has a strange name" % (tableName))
+ − 103 chromosome = m.group(1)
+ − 104 self.transcriptTables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.container, chromosome, self.verbosity)
+ − 105 self.chromosomes.append(chromosome)
+ − 106 for transcript in self.transcriptTables[chromosome].getIterator():
+ − 107 self.nbTranscripts += 1
+ − 108 self.nbNucleotides += transcript.getSize()
+ − 109 if self.type == None:
+ − 110 parserChooser = ParserChooser(self.verbosity)
+ − 111 parserChooser.findFormat(self.format)
+ − 112 self.type = parserChooser.getType()
+ − 113 if self.type == "transcript":
+ − 114 self.transcriptListParser = parserChooser.getParser(self.container)
+ − 115 elif self.type == "mapping":
+ − 116 self.mappingListParser = parserChooser.getParser(self.container)
+ − 117 else:
+ − 118 sys.exit("Error! Cannot handle format '%s'!" % (self.format))
+ − 119 if self.type == None:
+ − 120 sys.exit("Error! Cannot handle format '%s'!" % (self.format))
+ − 121
+ − 122 if self.transcriptListParser != None:
+ − 123 if self.type == "transcript":
+ − 124 self.nbTranscripts = self.transcriptListParser.getNbTranscripts()
+ − 125 self.nbNucleotides = self.transcriptListParser.getNbNucleotides()
+ − 126 self.chromosomes = self.transcriptListParser.getChromosomes()
+ − 127 if self.mappingListParser != None:
+ − 128 if self.type == "mapping":
+ − 129 self.nbTranscripts = self.mappingListParser.getNbMappings()
+ − 130 self.nbNucleotides = self.mappingListParser.getNbNucleotides()
+ − 131 self.chromosomes = self.mappingListParser.getChromosomes()
+ − 132
+ − 133 self.foundData = True
+ − 134
+ − 135
+ − 136 def getNbTranscripts(self):
+ − 137 """
+ − 138 Get the number of transcripts
+ − 139 @return: the number of transcripts
+ − 140 """
+ − 141 if not self.foundData:
+ − 142 self.findData()
+ − 143 return self.nbTranscripts
+ − 144
+ − 145
+ − 146 def getNbItems(self):
+ − 147 """
+ − 148 Same as getNbTranscripts
+ − 149 """
+ − 150 return self.getNbTranscripts()
+ − 151
+ − 152
+ − 153 def getNbNucleotides(self):
+ − 154 """
+ − 155 Get the number of nucleotides
+ − 156 @return: the number of nucleotides
+ − 157 """
+ − 158 if not self.foundData:
+ − 159 self.findData()
+ − 160 return self.nbNucleotides
+ − 161
+ − 162
+ − 163 def getChromosomes(self):
+ − 164 """
+ − 165 Get the chromosomes
+ − 166 @return: the chromosomes
+ − 167 """
+ − 168 if not self.foundData:
+ − 169 self.findData()
+ − 170 return self.chromosomes
+ − 171
+ − 172
+ − 173 def getIterator(self):
+ − 174 """
+ − 175 An iterator
+ − 176 @return: an iterator to a list of transcripts
+ − 177 """
+ − 178 if not self.foundData:
+ − 179 self.findData()
+ − 180 if self.type == "sql":
+ − 181 for chromosome in self.transcriptTables:
+ − 182 for transcript in self.transcriptTables[chromosome].getIterator():
+ − 183 yield transcript
+ − 184 return
+ − 185 if self.type == "transcript":
+ − 186 for transcript in self.transcriptListParser.getIterator():
+ − 187 yield transcript
+ − 188 return
+ − 189 if self.type == "mapping":
+ − 190 for mapping in self.mappingListParser.getIterator():
+ − 191 yield mapping.getTranscript()
+ − 192 return
+ − 193 sys.exit("Error! No valid transcript container given!")
+ − 194
+ − 195
+ − 196 def storeIntoDatabase(self, name = None):
+ − 197 """
+ − 198 Store the current transcript / mapping list into database
+ − 199 """
+ − 200 if not self.foundData:
+ − 201 self.findData()
+ − 202
+ − 203 if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0:
+ − 204 return
+ − 205
+ − 206 mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity)
+ − 207 mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser)
+ − 208 mySqlTranscriptWriter.write()
+ − 209 self.transcriptTables = mySqlTranscriptWriter.getTables()
+ − 210 self.type = "sql"
+ − 211
+ − 212
+ − 213 def getTables(self):
+ − 214 """
+ − 215 Accessor to the mySQL tables
+ − 216 @return: the mySQL tables
+ − 217 """
+ − 218 return self.transcriptTables
+ − 219
+ − 220
+ − 221 def setDefaultTagValue(self, name, value):
+ − 222 """
+ − 223 Set the given tag to the value for all transcripts
+ − 224 @param name: name of the tag
+ − 225 @type name: string
+ − 226 @param value: value of the tag
+ − 227 @type value: string
+ − 228 """
+ − 229 if self.type == "sql":
+ − 230 for chromosome in self.transcriptTables:
+ − 231 self.transcriptTables[chromosome].setDefaultTagValue(name, value)
+ − 232 elif self.type == "transcript":
+ − 233 self.transcriptListParser.setDefaultTagValue(name, value)
+ − 234 elif self.type == "mapping":
+ − 235 self.mappingListParser.setDefaultTagValue(name, value)
+ − 236