18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ##@file GameXmlMaker.py
|
|
4
|
|
5 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
6 # http://www.inra.fr
|
|
7 # http://urgi.versailles.inra.fr
|
|
8 #
|
|
9 # This software is governed by the CeCILL license under French law and
|
|
10 # abiding by the rules of distribution of free software. You can use,
|
|
11 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
12 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
13 # "http://www.cecill.info".
|
|
14 #
|
|
15 # As a counterpart to the access to the source code and rights to copy,
|
|
16 # modify and redistribute granted by the license, users are provided only
|
|
17 # with a limited warranty and the software's author, the holder of the
|
|
18 # economic rights, and the successive licensors have only limited
|
|
19 # liability.
|
|
20 #
|
|
21 # In this respect, the user's attention is drawn to the risks associated
|
|
22 # with loading, using, modifying and/or developing or reproducing the
|
|
23 # software by the user in light of its specific status of free software,
|
|
24 # that may mean that it is complicated to manipulate, and that also
|
|
25 # therefore means that it is reserved for developers and experienced
|
|
26 # professionals having in-depth computer knowledge. Users are therefore
|
|
27 # encouraged to load and test the software's suitability as regards their
|
|
28 # requirements in conditions enabling the security of their systems and/or
|
|
29 # data to be ensured and, more generally, to use and operate it in the
|
|
30 # same conditions as regards security.
|
|
31 #
|
|
32 # The fact that you are presently reading this means that you have had
|
|
33 # knowledge of the CeCILL license and that you accept its terms.
|
|
34
|
|
35 import os
|
|
36 import glob
|
|
37 import sys
|
|
38 import xml.dom.minidom
|
|
39 from commons.core.utils.RepetOptionParser import RepetOptionParser
|
|
40 from commons.core.utils.FileUtils import FileUtils
|
|
41 from commons.core.seq.BioseqDB import BioseqDB
|
|
42 from commons.core.sql.DbFactory import DbFactory
|
|
43 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
|
|
44 from commons.core.sql.TableSetAdaptator import TableSetAdaptator
|
|
45 from commons.core.sql.TableMapAdaptator import TableMapAdaptator
|
|
46
|
|
47 ## GameXmlMaker exports .
|
|
48 #
|
|
49 class GameXmlMaker(object):
|
|
50
|
|
51 def __init__(self, inFastaName = "", tablesFileName = "", configFileName = "", verbose = 0):
|
|
52 self._inFastaName = inFastaName
|
|
53 self._tablesFileName = tablesFileName
|
|
54 self._configFileName = configFileName
|
|
55 self._verbose = verbose
|
|
56 self._gameXMLFileName = ""
|
|
57
|
|
58 def setAttributesFromCmdLine(self):
|
|
59 description = "GameXmlMaker with -f option <=> step 1 : create gff files (write only the sequence and not the annotation. Only one sequence in each file)\n"
|
|
60 description += "GameXmlMaker with -t option <=> step 2 : add annotations in each file\n"
|
|
61 parser = RepetOptionParser(description = description)
|
|
62 parser.add_option("-f", "--inseq", dest = "inFastaName", action = "store", type = "string", help = "'fasta' file or 'seq' table recording the input sequences (required to generate new '.gamexml' files)", default = "")
|
|
63 parser.add_option("-t", "--tablesfile", dest = "tablesFileName", action = "store", type = "string", help = "tabulated file of table name to use to update the GameXML files (fields: tier name, format, table name)", default = "")
|
|
64 parser.add_option("-g", "--gameXML", dest = "gameXML", action = "store", type = "string", help = "gameXML file to update (if not specified, update all gameXML files in directory", default = "")
|
|
65 parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "configuration file for database connection", default = "")
|
|
66 parser.add_option("-v", "--verbose", dest = "verbose", action = "store", type = "int", help = "verbosity level (default=0, else 1 or 2)", default = 0)
|
|
67 (options, args) = parser.parse_args()
|
|
68 self._setAttributesFromOptions(options)
|
|
69
|
|
70 def _setAttributesFromOptions(self, options):
|
|
71 self.setInFastaName(options.inFastaName)
|
|
72 self.setTablesFileName(options.tablesFileName)
|
|
73 self.setGameXMLFileName(options.gameXML)
|
|
74 self.setConfigFileName(options.configFileName)
|
|
75 self.setVerbose(options.verbose)
|
|
76
|
|
77 def setInFastaName(self, inFastaName):
|
|
78 self._inFastaName = inFastaName
|
|
79
|
|
80 def setTablesFileName(self, tablesFileName):
|
|
81 self._tablesFileName = tablesFileName
|
|
82
|
|
83 def setGameXMLFileName(self, gamexmlFileName):
|
|
84 self._gameXMLFileName = gamexmlFileName
|
|
85
|
|
86 def setConfigFileName(self, configFileName):
|
|
87 self._configFileName = configFileName
|
|
88
|
|
89 def setVerbose(self, verbose):
|
|
90 self._verbose = verbose
|
|
91
|
|
92 def checkOptions(self):
|
|
93 if self._inFastaName == "" and self._tablesFileName == "":
|
|
94 raise Exception("ERROR: options -f or -t required")
|
|
95
|
|
96 if self._configFileName != "":
|
|
97 if not FileUtils.isRessourceExists(self._configFileName):
|
|
98 raise Exception("ERROR: configuration file does not exist!")
|
|
99
|
|
100 def run(self):
|
|
101 self.checkOptions()
|
|
102 if self._verbose > 0:
|
|
103 print "START GameXmlMaker"
|
|
104 sys.stdout.flush()
|
|
105
|
|
106 if self._inFastaName != "":
|
|
107 self._createGameXMLFiles()
|
|
108
|
|
109 if self._tablesFileName != "":
|
|
110 lXMLNewFile = []
|
|
111 if self._gameXMLFileName == "":
|
|
112 lXMLNewFile = glob.glob("*.gamexml")
|
|
113 else:
|
|
114 lXMLNewFile.append(self._gameXMLFileName)
|
|
115
|
|
116 for newGamexmlFile in lXMLNewFile:
|
|
117 self._updateGameXMLFileFromlTablesFile(newGamexmlFile)
|
|
118
|
|
119 if self._verbose > 0:
|
|
120 print "END GFF3Maker"
|
|
121 sys.stdout.flush()
|
|
122
|
|
123 ## Create as many XML files as sequences given in fasta file.
|
|
124 #
|
|
125 def _createGameXMLFiles(self):
|
|
126 if self._verbose > 0:
|
|
127 print "reading file %s" % self._inFastaName
|
|
128 sys.stdout.flush()
|
|
129
|
|
130 iBioseqDB = BioseqDB(self._inFastaName)
|
|
131
|
|
132 if self._verbose > 0:
|
|
133 print "nb of sequences = %i" % iBioseqDB.getSize()
|
|
134 sys.stdout.flush()
|
|
135
|
|
136 for iBioseq in iBioseqDB.db:
|
|
137 self._writeGameXMLFileFromBioseq(iBioseq)
|
|
138
|
|
139 def _writeGameXMLFileFromBioseq(self, iBioseq):
|
|
140 """
|
|
141 write new '.gamexml' file with <game> and <seq> tags
|
|
142 """
|
|
143 docXML = xml.dom.minidom.getDOMImplementation().createDocument(None, 'game', None)
|
|
144 root = docXML.documentElement
|
|
145
|
|
146 seq = docXML.createElement('seq')
|
|
147 seq.setAttribute('id', iBioseq.getHeader())
|
|
148 seq.setAttribute('focus', 'true')
|
|
149 root.appendChild(seq)
|
|
150
|
|
151 seqNameTag = docXML.createElement('name')
|
|
152 seqNameTag.appendChild(docXML.createTextNode(iBioseq.getHeader()))
|
|
153 seq.appendChild(seqNameTag)
|
|
154
|
|
155 residuesTag = docXML.createElement('residues')
|
|
156 residuesTag.appendChild(docXML.createTextNode(iBioseq.getSequence()))
|
|
157 seq.appendChild(residuesTag)
|
|
158
|
|
159 mapPos = docXML.createElement('map_position')
|
|
160 root.appendChild(mapPos)
|
|
161
|
|
162 arm = docXML.createElement('arm')
|
|
163 arm.appendChild(docXML.createTextNode(iBioseq.getHeader()))
|
|
164 mapPos.appendChild(arm)
|
|
165
|
|
166 span = docXML.createElement('span')
|
|
167 mapPos.appendChild(span)
|
|
168
|
|
169 start = docXML.createElement('start')
|
|
170 start.appendChild(docXML.createTextNode('1'))
|
|
171 span.appendChild(start)
|
|
172
|
|
173 end = docXML.createElement('end')
|
|
174 end.appendChild(docXML.createTextNode(str(iBioseq.getLength())))
|
|
175 span.appendChild(end)
|
|
176
|
|
177 fileName = "%s.gamexml" % iBioseq.getHeader()
|
|
178 docXML.writexml(open(fileName, "w"))
|
|
179
|
|
180 if self._verbose > 0:
|
|
181 print "file '%s' written" % fileName
|
|
182 sys.stdout.flush()
|
|
183
|
|
184
|
|
185 def _parseResultSpanInfo(self, element, type):
|
|
186 Qstart = -1
|
|
187 Qend = -1
|
|
188 Sstart = -1
|
|
189 Send = -1
|
|
190 query = ""
|
|
191 subject = ""
|
|
192 identity = 0
|
|
193 id = ""
|
|
194
|
|
195 if type == "path":
|
|
196 Qstart = element.getQueryStart()
|
|
197 Qend = element.getQueryEnd()
|
|
198 Sstart = element.getSubjectStart()
|
|
199 Send = element.getSubjectEnd()
|
|
200 query = element.getQueryName()
|
|
201 subject = element.getSubjectName()
|
|
202 identity = element.getIdentity()
|
|
203 id = element.getIdentifier()
|
|
204
|
|
205 elif type == "set":
|
|
206 Qstart = element.getStart()
|
|
207 Qend = element.getEnd()
|
|
208 query = element.getName()
|
|
209 subject = element.getName()
|
|
210 id = element.getId()
|
|
211
|
|
212 elif type == "map":
|
|
213 Qstart = element.getStart()
|
|
214 Qend = element.getEnd()
|
|
215 query = element.getSeqname()
|
|
216 subject = element.getName()
|
|
217 id = "-1"
|
|
218
|
|
219 return (Qstart,Qend,Sstart,Send,query,subject,identity,id)
|
|
220
|
|
221 def _addPathSpan(self, docXML, spanInfo, parent):
|
|
222 Qstart,Qend,Sstart,Send,query,subject,identity,id = spanInfo
|
|
223 Qstart = str(Qstart)
|
|
224 Qend = str(Qend)
|
|
225 Sstart = str(Sstart)
|
|
226 Send = str(Send)
|
|
227 identity = str(identity)
|
|
228 id = str(id)
|
|
229
|
|
230 resultSpan = docXML.createElement('result_span')
|
|
231 parent.appendChild(resultSpan)
|
|
232
|
|
233 relship1 = docXML.createElement('seq_relationship')
|
|
234 relship1.setAttribute('type', 'query')
|
|
235 relship1.setAttribute('seq', query)
|
|
236 relship2 = docXML.createElement('seq_relationship')
|
|
237 relship2.setAttribute('type', 'subject')
|
|
238 relship2.setAttribute('seq', '%s::%s' % (subject, id))
|
|
239 score = docXML.createElement('score')
|
|
240 resultSpan.appendChild(relship1)
|
|
241 resultSpan.appendChild(relship2)
|
|
242 score.appendChild(docXML.createTextNode(identity))
|
|
243 resultSpan.appendChild(score)
|
|
244 sp1 = docXML.createElement('span')
|
|
245 sp2 = docXML.createElement('span')
|
|
246 start1 = docXML.createElement('start')
|
|
247 start1.appendChild(docXML.createTextNode(Qstart))
|
|
248 start2 = docXML.createElement('start')
|
|
249 start2.appendChild(docXML.createTextNode(Sstart))
|
|
250 end1 = docXML.createElement('end')
|
|
251 end1.appendChild(docXML.createTextNode(Qend))
|
|
252 end2 = docXML.createElement('end')
|
|
253 end2.appendChild(docXML.createTextNode(Send))
|
|
254 relship1.appendChild(sp1)
|
|
255 sp1.appendChild(start1)
|
|
256 sp1.appendChild(end1)
|
|
257 relship2.appendChild(sp2)
|
|
258 sp2.appendChild(start2)
|
|
259 sp2.appendChild(end2)
|
|
260
|
|
261 def _addComputationalAnalysisTags(self, docXML, programName):
|
|
262 computationalAnalysis = None
|
|
263
|
|
264 lComputationalAnalysis = docXML.getElementsByTagName('computational_analysis')
|
|
265 for computationalAnalysisTag in lComputationalAnalysis:
|
|
266 if computationalAnalysisTag.getElementsByTagName("program")[0].nodeValue == programName:
|
|
267 computationalAnalysis = computationalAnalysisTag
|
|
268 break
|
|
269
|
|
270 if computationalAnalysis == None :
|
|
271 computationalAnalysis = docXML.createElement('computational_analysis')
|
|
272 root = docXML.documentElement
|
|
273 root.appendChild(computationalAnalysis)
|
|
274
|
|
275 program = docXML.createElement('program')
|
|
276 program.appendChild(docXML.createTextNode(programName))
|
|
277 computationalAnalysis.appendChild(program)
|
|
278
|
|
279 db = docXML.createElement('database')
|
|
280 db.appendChild(docXML.createTextNode('db'))
|
|
281 computationalAnalysis.appendChild(db)
|
|
282
|
|
283 return computationalAnalysis
|
|
284
|
|
285 def _addResultSetFromPath(self, docXML, iPath, parent):
|
|
286 computationalAnalysis = parent
|
|
287 resultSet = docXML.createElement('result_set')
|
|
288 resultSet.setAttribute('id', str(iPath.getIdentifier()))
|
|
289 computationalAnalysis.appendChild(resultSet)
|
|
290 resultSetName = docXML.createElement('name')
|
|
291 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iPath.getSubjectName(),str(iPath.getIdentifier()))))
|
|
292 resultSet.appendChild(resultSetName)
|
|
293 return resultSet
|
|
294
|
|
295 def _addResultSetFromSet(self, docXML, iSet, parent):
|
|
296 computationalAnalysis = parent
|
|
297 resultSet = docXML.createElement('result_set')
|
|
298 resultSet.setAttribute('id', str(iSet.getId()))
|
|
299 computationalAnalysis.appendChild(resultSet)
|
|
300 resultSetName = docXML.createElement('name')
|
|
301 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iSet.getName(),str(iSet.getId()))))
|
|
302 resultSet.appendChild(resultSetName)
|
|
303 return resultSet
|
|
304
|
|
305 def _addResultSetFromMap(self, docXML, iMap, parent):
|
|
306 computationalAnalysis = parent
|
|
307 resultSet = docXML.createElement('result_set')
|
|
308 resultSet.setAttribute('id', "-1")
|
|
309 computationalAnalysis.appendChild(resultSet)
|
|
310 resultSetName = docXML.createElement('name')
|
|
311 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iMap.getName(), "-1")))
|
|
312 resultSet.appendChild(resultSetName)
|
|
313 return resultSet
|
|
314
|
|
315 def _updateGameXMLFileFromlTablesFile(self, gameXMLFile):
|
|
316 docXML = xml.dom.minidom.parse(gameXMLFile)
|
|
317
|
|
318 f = open(self._tablesFileName, "r")
|
|
319 line = f.readline()
|
|
320
|
|
321 while line:
|
|
322 if not line.startswith("#"):
|
|
323 list = line.split()
|
|
324 programName = list[0]
|
|
325 format = list[1]
|
|
326 table = list[2]
|
|
327
|
|
328 gameXMLFileName = os.path.splitext(gameXMLFile)[0]
|
|
329 computationalAnalysis = self._addComputationalAnalysisTags(docXML,programName)
|
|
330 if format == "path":
|
|
331 iDB = DbFactory.createInstance(self._configFileName)
|
|
332 iTpa = TablePathAdaptator(iDB, table)
|
|
333
|
|
334 lPaths = iTpa.getPathListFromQuery(gameXMLFileName)
|
|
335 dResultSets = {}
|
|
336
|
|
337 for iPath in lPaths:
|
|
338 if dResultSets.get(iPath.getIdentifier()) is None:
|
|
339 resultSet = self._addResultSetFromPath(docXML, iPath,computationalAnalysis)
|
|
340 dResultSets[iPath.getIdentifier()] = resultSet
|
|
341 else:
|
|
342 resultSet = dResultSets[iPath.getIdentifier()]
|
|
343 spanInfo = self._parseResultSpanInfo(iPath, "path")
|
|
344 self._addPathSpan(docXML, spanInfo, resultSet)
|
|
345
|
|
346 if format == "set":
|
|
347 iDB = DbFactory.createInstance(self._configFileName)
|
|
348 iTsa = TableSetAdaptator(iDB, table)
|
|
349 lSet = iTsa.getSetListFromSeqName(gameXMLFileName)
|
|
350
|
|
351 dResultSets = {}
|
|
352
|
|
353 for iSet in lSet:
|
|
354 if dResultSets.get(iSet.getId()) is None:
|
|
355 resultSet = self._addResultSetFromSet(docXML, iSet,computationalAnalysis)
|
|
356 dResultSets[iSet.getId()] = resultSet
|
|
357 else:
|
|
358 resultSet = dResultSets[iSet.getId()]
|
|
359
|
|
360 spanInfo = self._parseResultSpanInfo(iSet, "set")
|
|
361 self._addPathSpan(docXML, spanInfo, resultSet)
|
|
362
|
|
363 if format == "map":
|
|
364 iDB = DbFactory.createInstance(self._configFileName)
|
|
365 iTma = TableMapAdaptator(iDB, table)
|
|
366 lMap = iTma.getMapListFromChr(gameXMLFileName)
|
|
367 dResultSets = {}
|
|
368 for iMap in lMap:
|
|
369 resultSet = self._addResultSetFromMap(docXML, iMap,computationalAnalysis)
|
|
370 spanInfo = self._parseResultSpanInfo(iMap, "map")
|
|
371 self._addPathSpan(docXML, spanInfo, resultSet)
|
|
372
|
|
373 line = f.readline()
|
|
374
|
|
375 outputGameXMLFile = open(gameXMLFile, "w")
|
|
376 xmlstr = docXML.toxml()
|
|
377 outputGameXMLFile.write(xmlstr)
|
|
378 outputGameXMLFile.close()
|
|
379
|
|
380 if __name__ == "__main__":
|
|
381 iGameXmlMaker = GameXmlMaker()
|
|
382 iGameXmlMaker.setAttributesFromCmdLine()
|
|
383 iGameXmlMaker.run()
|
|
384 |