Mercurial > repos > yufei-luo > s_mart
view commons/core/writer/MySqlTranscriptWriter.py @ 62:8c42a6d7ffd4
Added simple test BED file.
author | m-zytnicki |
---|---|
date | Mon, 19 Oct 2015 11:25:11 +0200 |
parents | 169d364ddd91 |
children |
line wrap: on
line source
# # Copyright INRA-URGI 2009-2010 # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. # import os import random from SMART.Java.Python.mySql.MySqlTable import MySqlTable from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable from SMART.Java.Python.misc.Progress import Progress class MySqlTranscriptWriter(object): """ A class that writes a transcript list into a mySQL table @ivar name: name of the tables @type name: string @ivar tables: the tables @type tables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} @ivar mySqlConnection: connection to a MySQL database @type mySqlConnection: class L{MySqlConnection<MySqlConnection>} @ivar tmpTranscriptFileHandles: files where transcripts are temporary stored, before copy into database @type tmpTranscriptFileHandles: dict of file handles @ivar nbTranscriptsByChromosome: number of transcripts written @type nbTranscriptsByChromosome: dict of int (one for each chromosome) @ivar randomNumber: a random number, used for having a unique name for the tables @type randomNumber: int @ivar toBeWritten: there exists transcripts to be copied into database @type toBeWritten: bool @ivar verbosity: verbosity @type verbosity: int """ def __init__(self, connection, name = None, verbosity = 0): """ Constructor @param name: name of the file @type name: string @param verbosity: verbosity @type verbosity: int """ self.name = name self.verbosity = verbosity self.tables = {} self.indices = {} self.tmpTranscriptFileHandles = {} self.nbTranscriptsByChromosome = {} self.toBeWritten = False self.randomNumber = random.randint(0, 100000) self.mySqlConnection = connection self.nbTmpFiles = 100 self.transcriptValues = {} self.nbTranscriptValues = 1000 if self.name != None: pos = self.name.rfind(os.sep) if pos != -1: self.name = self.name[pos+1:] def __del__(self): """ Destructor Possibly write into into database the last transcripts """ if self.toBeWritten: self.write() def addIndex(self, name, values): """ Add an index to the tables @param name: name of the index @type name: string @param values: values to index @type values: list of strings """ self.indices[name] = values def createTable(self, chromosome): """ Create a table for a chromosome @param chromosome: a chromosome name @type chromosome: string """ self.tables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.name, chromosome, self.verbosity) self.tables[chromosome].createTranscriptTable() for name, values in self.indices.iteritems(): self.tables[chromosome].createIndex("%s_%s_%d" % (name, chromosome, self.randomNumber), values) def addTranscript(self, transcript): """ Add a transcript to the list of transcripts to be written @param transcript: transcript to be written @type transcript: class L{Transcript<Transcript>} """ chromosome = transcript.getChromosome() if chromosome not in self.tables: self.createTable(chromosome) self.nbTranscriptsByChromosome[chromosome] = 1 if chromosome not in self.transcriptValues: self.transcriptValues[chromosome] = [] self.transcriptValues[chromosome].append(transcript.getSqlValues()) self.nbTranscriptsByChromosome[chromosome] += 1 self.toBeWritten = True if sum([len(transcripts) for transcripts in self.transcriptValues.values()]) > self.nbTranscriptValues: self.write() def addElement(self, element): """ Same as "addTranscript" @param element: transcript to be written @type element: class L{Transcript<Transcript>} """ self.addTranscript(element) # def addTranscriptList(self, transcriptListParser): # """ # Add a list of transcripts to the transcripts to be written # @param transcriptListParser: transcripts to be written # @type transcriptListParser: class L{TranscriptListParser<TranscriptListParser>} # """ # progress = Progress(transcriptListParser.getNbTranscripts(), "Storing %s into database" % (transcriptListParser.fileName), self.verbosity) # for transcript in transcriptListParser.getIterator(): # self.addTranscript(transcript) # progress.inc() # progress.done() def addTranscriptList(self, transcriptListParser): """ Add a list of transcripts to the transcripts to be written @param transcriptListParser: transcripts to be written @type transcriptListParser: class L{TranscriptListParser<TranscriptListParser>} """ self.transcriptListParser = transcriptListParser self.mySqlConnection.executeManyQueriesIterator(self) def getIterator(self): """ Iterator to the SQL commands to insert the list """ progress = Progress(self.transcriptListParser.getNbTranscripts(), "Storing %s into database" % (self.transcriptListParser.fileName), self.verbosity) for transcript in self.transcriptListParser.getIterator(): chromosome = transcript.getChromosome() if chromosome not in self.tables: self.createTable(chromosome) self.nbTranscriptsByChromosome[chromosome] = self.nbTranscriptsByChromosome.get(chromosome, 0) + 1 values = transcript.getSqlValues() yield "INSERT INTO '%s' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join([MySqlTable.formatSql(values[variable], self.tables[chromosome].types[variable], self.tables[chromosome].sizes[variable]) for variable in self.tables[chromosome].variables])) progress.inc() progress.done() def write(self): """ Copy the content of the files into the database (May add transcripts to already created databases) """ for chromosome in self.transcriptValues: if chromosome in self.transcriptValues: self.tables[chromosome].insertMany(self.transcriptValues[chromosome]) self.transcriptValues = {} self.toBeWritten = False def getTables(self): """ Get the tables @return: the mySQL tables """ if self.toBeWritten: self.write() return self.tables def removeTables(self): """ Drop the tables """ for chromosome in self.tables: self.tables[chromosome].remove()