Mercurial > repos > yufei-luo > s_mart
diff commons/tools/GameXmlMaker.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/tools/GameXmlMaker.py Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,384 @@ +#!/usr/bin/env python + +##@file GameXmlMaker.py + +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +import os +import glob +import sys +import xml.dom.minidom +from commons.core.utils.RepetOptionParser import RepetOptionParser +from commons.core.utils.FileUtils import FileUtils +from commons.core.seq.BioseqDB import BioseqDB +from commons.core.sql.DbFactory import DbFactory +from commons.core.sql.TablePathAdaptator import TablePathAdaptator +from commons.core.sql.TableSetAdaptator import TableSetAdaptator +from commons.core.sql.TableMapAdaptator import TableMapAdaptator + +## GameXmlMaker exports . +# +class GameXmlMaker(object): + + def __init__(self, inFastaName = "", tablesFileName = "", configFileName = "", verbose = 0): + self._inFastaName = inFastaName + self._tablesFileName = tablesFileName + self._configFileName = configFileName + self._verbose = verbose + self._gameXMLFileName = "" + + def setAttributesFromCmdLine(self): + description = "GameXmlMaker with -f option <=> step 1 : create gff files (write only the sequence and not the annotation. Only one sequence in each file)\n" + description += "GameXmlMaker with -t option <=> step 2 : add annotations in each file\n" + parser = RepetOptionParser(description = description) + parser.add_option("-f", "--inseq", dest = "inFastaName", action = "store", type = "string", help = "'fasta' file or 'seq' table recording the input sequences (required to generate new '.gamexml' files)", default = "") + parser.add_option("-t", "--tablesfile", dest = "tablesFileName", action = "store", type = "string", help = "tabulated file of table name to use to update the GameXML files (fields: tier name, format, table name)", default = "") + parser.add_option("-g", "--gameXML", dest = "gameXML", action = "store", type = "string", help = "gameXML file to update (if not specified, update all gameXML files in directory", default = "") + parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "configuration file for database connection", default = "") + parser.add_option("-v", "--verbose", dest = "verbose", action = "store", type = "int", help = "verbosity level (default=0, else 1 or 2)", default = 0) + (options, args) = parser.parse_args() + self._setAttributesFromOptions(options) + + def _setAttributesFromOptions(self, options): + self.setInFastaName(options.inFastaName) + self.setTablesFileName(options.tablesFileName) + self.setGameXMLFileName(options.gameXML) + self.setConfigFileName(options.configFileName) + self.setVerbose(options.verbose) + + def setInFastaName(self, inFastaName): + self._inFastaName = inFastaName + + def setTablesFileName(self, tablesFileName): + self._tablesFileName = tablesFileName + + def setGameXMLFileName(self, gamexmlFileName): + self._gameXMLFileName = gamexmlFileName + + def setConfigFileName(self, configFileName): + self._configFileName = configFileName + + def setVerbose(self, verbose): + self._verbose = verbose + + def checkOptions(self): + if self._inFastaName == "" and self._tablesFileName == "": + raise Exception("ERROR: options -f or -t required") + + if self._configFileName != "": + if not FileUtils.isRessourceExists(self._configFileName): + raise Exception("ERROR: configuration file does not exist!") + + def run(self): + self.checkOptions() + if self._verbose > 0: + print "START GameXmlMaker" + sys.stdout.flush() + + if self._inFastaName != "": + self._createGameXMLFiles() + + if self._tablesFileName != "": + lXMLNewFile = [] + if self._gameXMLFileName == "": + lXMLNewFile = glob.glob("*.gamexml") + else: + lXMLNewFile.append(self._gameXMLFileName) + + for newGamexmlFile in lXMLNewFile: + self._updateGameXMLFileFromlTablesFile(newGamexmlFile) + + if self._verbose > 0: + print "END GFF3Maker" + sys.stdout.flush() + + ## Create as many XML files as sequences given in fasta file. + # + def _createGameXMLFiles(self): + if self._verbose > 0: + print "reading file %s" % self._inFastaName + sys.stdout.flush() + + iBioseqDB = BioseqDB(self._inFastaName) + + if self._verbose > 0: + print "nb of sequences = %i" % iBioseqDB.getSize() + sys.stdout.flush() + + for iBioseq in iBioseqDB.db: + self._writeGameXMLFileFromBioseq(iBioseq) + + def _writeGameXMLFileFromBioseq(self, iBioseq): + """ + write new '.gamexml' file with <game> and <seq> tags + """ + docXML = xml.dom.minidom.getDOMImplementation().createDocument(None, 'game', None) + root = docXML.documentElement + + seq = docXML.createElement('seq') + seq.setAttribute('id', iBioseq.getHeader()) + seq.setAttribute('focus', 'true') + root.appendChild(seq) + + seqNameTag = docXML.createElement('name') + seqNameTag.appendChild(docXML.createTextNode(iBioseq.getHeader())) + seq.appendChild(seqNameTag) + + residuesTag = docXML.createElement('residues') + residuesTag.appendChild(docXML.createTextNode(iBioseq.getSequence())) + seq.appendChild(residuesTag) + + mapPos = docXML.createElement('map_position') + root.appendChild(mapPos) + + arm = docXML.createElement('arm') + arm.appendChild(docXML.createTextNode(iBioseq.getHeader())) + mapPos.appendChild(arm) + + span = docXML.createElement('span') + mapPos.appendChild(span) + + start = docXML.createElement('start') + start.appendChild(docXML.createTextNode('1')) + span.appendChild(start) + + end = docXML.createElement('end') + end.appendChild(docXML.createTextNode(str(iBioseq.getLength()))) + span.appendChild(end) + + fileName = "%s.gamexml" % iBioseq.getHeader() + docXML.writexml(open(fileName, "w")) + + if self._verbose > 0: + print "file '%s' written" % fileName + sys.stdout.flush() + + + def _parseResultSpanInfo(self, element, type): + Qstart = -1 + Qend = -1 + Sstart = -1 + Send = -1 + query = "" + subject = "" + identity = 0 + id = "" + + if type == "path": + Qstart = element.getQueryStart() + Qend = element.getQueryEnd() + Sstart = element.getSubjectStart() + Send = element.getSubjectEnd() + query = element.getQueryName() + subject = element.getSubjectName() + identity = element.getIdentity() + id = element.getIdentifier() + + elif type == "set": + Qstart = element.getStart() + Qend = element.getEnd() + query = element.getName() + subject = element.getName() + id = element.getId() + + elif type == "map": + Qstart = element.getStart() + Qend = element.getEnd() + query = element.getSeqname() + subject = element.getName() + id = "-1" + + return (Qstart,Qend,Sstart,Send,query,subject,identity,id) + + def _addPathSpan(self, docXML, spanInfo, parent): + Qstart,Qend,Sstart,Send,query,subject,identity,id = spanInfo + Qstart = str(Qstart) + Qend = str(Qend) + Sstart = str(Sstart) + Send = str(Send) + identity = str(identity) + id = str(id) + + resultSpan = docXML.createElement('result_span') + parent.appendChild(resultSpan) + + relship1 = docXML.createElement('seq_relationship') + relship1.setAttribute('type', 'query') + relship1.setAttribute('seq', query) + relship2 = docXML.createElement('seq_relationship') + relship2.setAttribute('type', 'subject') + relship2.setAttribute('seq', '%s::%s' % (subject, id)) + score = docXML.createElement('score') + resultSpan.appendChild(relship1) + resultSpan.appendChild(relship2) + score.appendChild(docXML.createTextNode(identity)) + resultSpan.appendChild(score) + sp1 = docXML.createElement('span') + sp2 = docXML.createElement('span') + start1 = docXML.createElement('start') + start1.appendChild(docXML.createTextNode(Qstart)) + start2 = docXML.createElement('start') + start2.appendChild(docXML.createTextNode(Sstart)) + end1 = docXML.createElement('end') + end1.appendChild(docXML.createTextNode(Qend)) + end2 = docXML.createElement('end') + end2.appendChild(docXML.createTextNode(Send)) + relship1.appendChild(sp1) + sp1.appendChild(start1) + sp1.appendChild(end1) + relship2.appendChild(sp2) + sp2.appendChild(start2) + sp2.appendChild(end2) + + def _addComputationalAnalysisTags(self, docXML, programName): + computationalAnalysis = None + + lComputationalAnalysis = docXML.getElementsByTagName('computational_analysis') + for computationalAnalysisTag in lComputationalAnalysis: + if computationalAnalysisTag.getElementsByTagName("program")[0].nodeValue == programName: + computationalAnalysis = computationalAnalysisTag + break + + if computationalAnalysis == None : + computationalAnalysis = docXML.createElement('computational_analysis') + root = docXML.documentElement + root.appendChild(computationalAnalysis) + + program = docXML.createElement('program') + program.appendChild(docXML.createTextNode(programName)) + computationalAnalysis.appendChild(program) + + db = docXML.createElement('database') + db.appendChild(docXML.createTextNode('db')) + computationalAnalysis.appendChild(db) + + return computationalAnalysis + + def _addResultSetFromPath(self, docXML, iPath, parent): + computationalAnalysis = parent + resultSet = docXML.createElement('result_set') + resultSet.setAttribute('id', str(iPath.getIdentifier())) + computationalAnalysis.appendChild(resultSet) + resultSetName = docXML.createElement('name') + resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iPath.getSubjectName(),str(iPath.getIdentifier())))) + resultSet.appendChild(resultSetName) + return resultSet + + def _addResultSetFromSet(self, docXML, iSet, parent): + computationalAnalysis = parent + resultSet = docXML.createElement('result_set') + resultSet.setAttribute('id', str(iSet.getId())) + computationalAnalysis.appendChild(resultSet) + resultSetName = docXML.createElement('name') + resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iSet.getName(),str(iSet.getId())))) + resultSet.appendChild(resultSetName) + return resultSet + + def _addResultSetFromMap(self, docXML, iMap, parent): + computationalAnalysis = parent + resultSet = docXML.createElement('result_set') + resultSet.setAttribute('id', "-1") + computationalAnalysis.appendChild(resultSet) + resultSetName = docXML.createElement('name') + resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iMap.getName(), "-1"))) + resultSet.appendChild(resultSetName) + return resultSet + + def _updateGameXMLFileFromlTablesFile(self, gameXMLFile): + docXML = xml.dom.minidom.parse(gameXMLFile) + + f = open(self._tablesFileName, "r") + line = f.readline() + + while line: + if not line.startswith("#"): + list = line.split() + programName = list[0] + format = list[1] + table = list[2] + + gameXMLFileName = os.path.splitext(gameXMLFile)[0] + computationalAnalysis = self._addComputationalAnalysisTags(docXML,programName) + if format == "path": + iDB = DbFactory.createInstance(self._configFileName) + iTpa = TablePathAdaptator(iDB, table) + + lPaths = iTpa.getPathListFromQuery(gameXMLFileName) + dResultSets = {} + + for iPath in lPaths: + if dResultSets.get(iPath.getIdentifier()) is None: + resultSet = self._addResultSetFromPath(docXML, iPath,computationalAnalysis) + dResultSets[iPath.getIdentifier()] = resultSet + else: + resultSet = dResultSets[iPath.getIdentifier()] + spanInfo = self._parseResultSpanInfo(iPath, "path") + self._addPathSpan(docXML, spanInfo, resultSet) + + if format == "set": + iDB = DbFactory.createInstance(self._configFileName) + iTsa = TableSetAdaptator(iDB, table) + lSet = iTsa.getSetListFromSeqName(gameXMLFileName) + + dResultSets = {} + + for iSet in lSet: + if dResultSets.get(iSet.getId()) is None: + resultSet = self._addResultSetFromSet(docXML, iSet,computationalAnalysis) + dResultSets[iSet.getId()] = resultSet + else: + resultSet = dResultSets[iSet.getId()] + + spanInfo = self._parseResultSpanInfo(iSet, "set") + self._addPathSpan(docXML, spanInfo, resultSet) + + if format == "map": + iDB = DbFactory.createInstance(self._configFileName) + iTma = TableMapAdaptator(iDB, table) + lMap = iTma.getMapListFromChr(gameXMLFileName) + dResultSets = {} + for iMap in lMap: + resultSet = self._addResultSetFromMap(docXML, iMap,computationalAnalysis) + spanInfo = self._parseResultSpanInfo(iMap, "map") + self._addPathSpan(docXML, spanInfo, resultSet) + + line = f.readline() + + outputGameXMLFile = open(gameXMLFile, "w") + xmlstr = docXML.toxml() + outputGameXMLFile.write(xmlstr) + outputGameXMLFile.close() + +if __name__ == "__main__": + iGameXmlMaker = GameXmlMaker() + iGameXmlMaker.setAttributesFromCmdLine() + iGameXmlMaker.run() + \ No newline at end of file