diff commons/tools/GameXmlMaker.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/GameXmlMaker.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,384 @@
+#!/usr/bin/env python
+
+##@file GameXmlMaker.py
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import os
+import glob
+import sys
+import xml.dom.minidom
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.seq.BioseqDB import BioseqDB
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.TablePathAdaptator import TablePathAdaptator
+from commons.core.sql.TableSetAdaptator import TableSetAdaptator
+from commons.core.sql.TableMapAdaptator import TableMapAdaptator
+
+## GameXmlMaker exports .
+#
+class GameXmlMaker(object):
+
+    def __init__(self, inFastaName = "", tablesFileName = "", configFileName = "", verbose = 0):
+        self._inFastaName = inFastaName
+        self._tablesFileName = tablesFileName
+        self._configFileName = configFileName
+        self._verbose = verbose
+        self._gameXMLFileName = ""
+    
+    def setAttributesFromCmdLine(self):
+        description = "GameXmlMaker with -f option <=> step 1 : create gff files (write only the sequence and not the annotation. Only one sequence in each file)\n"
+        description += "GameXmlMaker with -t option <=> step 2 : add annotations in each file\n"
+        parser = RepetOptionParser(description = description)
+        parser.add_option("-f", "--inseq",       dest = "inFastaName",      action = "store",       type = "string", help = "'fasta' file or 'seq' table recording the input sequences (required to generate new '.gamexml' files)", default = "")
+        parser.add_option("-t", "--tablesfile",  dest = "tablesFileName",   action = "store",       type = "string", help = "tabulated file of table name to use to update the GameXML files (fields: tier name, format, table name)", default = "")
+        parser.add_option("-g", "--gameXML",     dest = "gameXML",          action = "store",       type = "string", help = "gameXML file to update (if not specified, update all gameXML files in directory", default = "")
+        parser.add_option("-C", "--config",      dest = "configFileName",   action = "store",       type = "string", help = "configuration file for database connection", default = "")
+        parser.add_option("-v", "--verbose",     dest = "verbose",          action = "store",       type = "int",    help = "verbosity level (default=0, else 1 or 2)", default = 0)
+        (options, args) = parser.parse_args()
+        self._setAttributesFromOptions(options)
+        
+    def _setAttributesFromOptions(self, options):
+        self.setInFastaName(options.inFastaName)
+        self.setTablesFileName(options.tablesFileName)
+        self.setGameXMLFileName(options.gameXML)
+        self.setConfigFileName(options.configFileName)
+        self.setVerbose(options.verbose)
+        
+    def setInFastaName(self, inFastaName):
+        self._inFastaName = inFastaName
+        
+    def setTablesFileName(self, tablesFileName):
+        self._tablesFileName = tablesFileName
+        
+    def setGameXMLFileName(self, gamexmlFileName):   
+        self._gameXMLFileName = gamexmlFileName
+        
+    def setConfigFileName(self, configFileName):
+        self._configFileName = configFileName
+        
+    def setVerbose(self, verbose):
+        self._verbose = verbose
+
+    def checkOptions(self):       
+        if self._inFastaName == "" and self._tablesFileName == "":
+            raise Exception("ERROR: options -f or -t required")
+    
+        if self._configFileName != "":
+            if not FileUtils.isRessourceExists(self._configFileName):
+                raise Exception("ERROR: configuration file does not exist!")
+            
+    def run(self):
+        self.checkOptions()
+        if self._verbose > 0:
+            print "START GameXmlMaker"
+            sys.stdout.flush()
+        
+        if self._inFastaName != "":
+            self._createGameXMLFiles()
+        
+        if self._tablesFileName != "":
+            lXMLNewFile = []    
+            if self._gameXMLFileName == "":
+                lXMLNewFile = glob.glob("*.gamexml")
+            else:
+                lXMLNewFile.append(self._gameXMLFileName)  
+                
+            for newGamexmlFile in lXMLNewFile:
+                self._updateGameXMLFileFromlTablesFile(newGamexmlFile)
+            
+        if self._verbose > 0:
+            print "END GFF3Maker"
+            sys.stdout.flush()
+            
+    ## Create as many XML files as sequences given in fasta file.
+    #
+    def _createGameXMLFiles(self):
+        if self._verbose > 0:
+            print "reading file %s" % self._inFastaName
+            sys.stdout.flush()
+            
+        iBioseqDB = BioseqDB(self._inFastaName)
+        
+        if self._verbose > 0:
+            print "nb of sequences = %i" % iBioseqDB.getSize()
+            sys.stdout.flush()
+            
+        for iBioseq in iBioseqDB.db:
+            self._writeGameXMLFileFromBioseq(iBioseq)
+
+    def _writeGameXMLFileFromBioseq(self, iBioseq):
+        """
+        write new '.gamexml' file with <game> and <seq> tags
+        """
+        docXML = xml.dom.minidom.getDOMImplementation().createDocument(None, 'game', None)
+        root = docXML.documentElement
+
+        seq = docXML.createElement('seq')
+        seq.setAttribute('id', iBioseq.getHeader())
+        seq.setAttribute('focus', 'true')
+        root.appendChild(seq)
+        
+        seqNameTag = docXML.createElement('name')
+        seqNameTag.appendChild(docXML.createTextNode(iBioseq.getHeader()))
+        seq.appendChild(seqNameTag)
+        
+        residuesTag = docXML.createElement('residues')
+        residuesTag.appendChild(docXML.createTextNode(iBioseq.getSequence()))
+        seq.appendChild(residuesTag)
+
+        mapPos = docXML.createElement('map_position')
+        root.appendChild(mapPos)
+        
+        arm = docXML.createElement('arm')
+        arm.appendChild(docXML.createTextNode(iBioseq.getHeader()))
+        mapPos.appendChild(arm)
+        
+        span = docXML.createElement('span')
+        mapPos.appendChild(span)
+        
+        start = docXML.createElement('start')
+        start.appendChild(docXML.createTextNode('1'))
+        span.appendChild(start)
+        
+        end = docXML.createElement('end')
+        end.appendChild(docXML.createTextNode(str(iBioseq.getLength())))
+        span.appendChild(end)
+        
+        fileName = "%s.gamexml" % iBioseq.getHeader()
+        docXML.writexml(open(fileName, "w"))
+        
+        if self._verbose > 0:
+            print "file '%s' written" % fileName
+            sys.stdout.flush()
+
+
+    def _parseResultSpanInfo(self, element, type):
+        Qstart = -1
+        Qend = -1
+        Sstart = -1
+        Send = -1
+        query = ""
+        subject = ""
+        identity = 0
+        id = ""
+        
+        if type == "path":
+            Qstart = element.getQueryStart()
+            Qend = element.getQueryEnd()
+            Sstart = element.getSubjectStart()
+            Send = element.getSubjectEnd()
+            query = element.getQueryName()
+            subject = element.getSubjectName()
+            identity = element.getIdentity()
+            id = element.getIdentifier()
+        
+        elif type == "set":
+            Qstart = element.getStart()
+            Qend = element.getEnd()
+            query = element.getName()
+            subject = element.getName()
+            id = element.getId()
+
+        elif type == "map":
+            Qstart = element.getStart()
+            Qend = element.getEnd()
+            query = element.getSeqname()
+            subject = element.getName()
+            id  = "-1"
+            
+        return (Qstart,Qend,Sstart,Send,query,subject,identity,id)
+
+    def _addPathSpan(self, docXML, spanInfo, parent):
+        Qstart,Qend,Sstart,Send,query,subject,identity,id = spanInfo
+        Qstart = str(Qstart)
+        Qend = str(Qend)
+        Sstart = str(Sstart)
+        Send = str(Send)
+        identity = str(identity)
+        id = str(id)
+        
+        resultSpan = docXML.createElement('result_span')
+        parent.appendChild(resultSpan)
+        
+        relship1 = docXML.createElement('seq_relationship')
+        relship1.setAttribute('type', 'query')
+        relship1.setAttribute('seq', query)
+        relship2 = docXML.createElement('seq_relationship')
+        relship2.setAttribute('type', 'subject')
+        relship2.setAttribute('seq', '%s::%s' % (subject, id))
+        score = docXML.createElement('score')
+        resultSpan.appendChild(relship1)
+        resultSpan.appendChild(relship2)
+        score.appendChild(docXML.createTextNode(identity))
+        resultSpan.appendChild(score)
+        sp1 = docXML.createElement('span')
+        sp2 = docXML.createElement('span')
+        start1 = docXML.createElement('start')
+        start1.appendChild(docXML.createTextNode(Qstart))
+        start2 = docXML.createElement('start')
+        start2.appendChild(docXML.createTextNode(Sstart))
+        end1 = docXML.createElement('end')
+        end1.appendChild(docXML.createTextNode(Qend))
+        end2 = docXML.createElement('end')
+        end2.appendChild(docXML.createTextNode(Send))
+        relship1.appendChild(sp1)
+        sp1.appendChild(start1)
+        sp1.appendChild(end1)
+        relship2.appendChild(sp2)
+        sp2.appendChild(start2)
+        sp2.appendChild(end2)
+
+    def _addComputationalAnalysisTags(self, docXML, programName):
+        computationalAnalysis = None
+        
+        lComputationalAnalysis = docXML.getElementsByTagName('computational_analysis')
+        for computationalAnalysisTag in lComputationalAnalysis:
+            if computationalAnalysisTag.getElementsByTagName("program")[0].nodeValue == programName:
+                computationalAnalysis = computationalAnalysisTag
+                break
+            
+        if computationalAnalysis == None :
+            computationalAnalysis = docXML.createElement('computational_analysis')
+            root = docXML.documentElement
+            root.appendChild(computationalAnalysis)
+            
+            program = docXML.createElement('program')
+            program.appendChild(docXML.createTextNode(programName))
+            computationalAnalysis.appendChild(program)
+            
+            db = docXML.createElement('database')
+            db.appendChild(docXML.createTextNode('db'))
+            computationalAnalysis.appendChild(db)
+            
+        return computationalAnalysis
+        
+    def _addResultSetFromPath(self, docXML, iPath, parent):
+        computationalAnalysis = parent
+        resultSet = docXML.createElement('result_set')
+        resultSet.setAttribute('id', str(iPath.getIdentifier())) 
+        computationalAnalysis.appendChild(resultSet)
+        resultSetName = docXML.createElement('name')
+        resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iPath.getSubjectName(),str(iPath.getIdentifier()))))
+        resultSet.appendChild(resultSetName)
+        return resultSet
+    
+    def _addResultSetFromSet(self, docXML, iSet, parent):
+        computationalAnalysis = parent
+        resultSet = docXML.createElement('result_set')
+        resultSet.setAttribute('id', str(iSet.getId())) 
+        computationalAnalysis.appendChild(resultSet)
+        resultSetName = docXML.createElement('name')
+        resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iSet.getName(),str(iSet.getId()))))
+        resultSet.appendChild(resultSetName)
+        return resultSet
+    
+    def _addResultSetFromMap(self, docXML, iMap, parent):
+        computationalAnalysis = parent
+        resultSet = docXML.createElement('result_set')
+        resultSet.setAttribute('id', "-1") 
+        computationalAnalysis.appendChild(resultSet)
+        resultSetName = docXML.createElement('name')
+        resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iMap.getName(), "-1")))
+        resultSet.appendChild(resultSetName)
+        return resultSet
+
+    def _updateGameXMLFileFromlTablesFile(self, gameXMLFile):
+        docXML = xml.dom.minidom.parse(gameXMLFile) 
+        
+        f = open(self._tablesFileName, "r")
+        line = f.readline()
+            
+        while line: 
+            if not line.startswith("#"):
+                list = line.split()
+                programName = list[0]
+                format = list[1]
+                table = list[2]
+
+                gameXMLFileName = os.path.splitext(gameXMLFile)[0]
+                computationalAnalysis = self._addComputationalAnalysisTags(docXML,programName)
+                if format == "path":    
+                    iDB = DbFactory.createInstance(self._configFileName)
+                    iTpa = TablePathAdaptator(iDB, table)
+                    
+                    lPaths = iTpa.getPathListFromQuery(gameXMLFileName)
+                    dResultSets = {}
+
+                    for iPath in lPaths:
+                        if dResultSets.get(iPath.getIdentifier()) is None:
+                            resultSet = self._addResultSetFromPath(docXML, iPath,computationalAnalysis)
+                            dResultSets[iPath.getIdentifier()] = resultSet
+                        else:
+                            resultSet = dResultSets[iPath.getIdentifier()]
+                        spanInfo = self._parseResultSpanInfo(iPath, "path")
+                        self._addPathSpan(docXML, spanInfo, resultSet)
+                        
+                if format == "set":
+                    iDB = DbFactory.createInstance(self._configFileName)
+                    iTsa = TableSetAdaptator(iDB, table)
+                    lSet = iTsa.getSetListFromSeqName(gameXMLFileName)
+                    
+                    dResultSets = {}
+
+                    for iSet in lSet:
+                        if dResultSets.get(iSet.getId()) is None:
+                            resultSet = self._addResultSetFromSet(docXML, iSet,computationalAnalysis)
+                            dResultSets[iSet.getId()] = resultSet
+                        else:
+                            resultSet = dResultSets[iSet.getId()]
+                            
+                        spanInfo = self._parseResultSpanInfo(iSet, "set")
+                        self._addPathSpan(docXML, spanInfo, resultSet)
+                        
+                if format == "map":
+                    iDB = DbFactory.createInstance(self._configFileName)
+                    iTma = TableMapAdaptator(iDB, table)
+                    lMap = iTma.getMapListFromChr(gameXMLFileName)
+                    dResultSets = {}
+                    for iMap in lMap:
+                        resultSet = self._addResultSetFromMap(docXML, iMap,computationalAnalysis)
+                        spanInfo = self._parseResultSpanInfo(iMap, "map")
+                        self._addPathSpan(docXML, spanInfo, resultSet)
+                        
+            line = f.readline()
+            
+        outputGameXMLFile = open(gameXMLFile, "w")            
+        xmlstr = docXML.toxml()
+        outputGameXMLFile.write(xmlstr)
+        outputGameXMLFile.close()
+            
+if __name__ == "__main__":
+    iGameXmlMaker = GameXmlMaker()
+    iGameXmlMaker.setAttributesFromCmdLine()
+    iGameXmlMaker.run()
+    
\ No newline at end of file