Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/GFF3Maker.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ##@file GFF3Maker.py | |
4 | |
5 # Copyright INRA (Institut National de la Recherche Agronomique) | |
6 # http://www.inra.fr | |
7 # http://urgi.versailles.inra.fr | |
8 # | |
9 # This software is governed by the CeCILL license under French law and | |
10 # abiding by the rules of distribution of free software. You can use, | |
11 # modify and/ or redistribute the software under the terms of the CeCILL | |
12 # license as circulated by CEA, CNRS and INRIA at the following URL | |
13 # "http://www.cecill.info". | |
14 # | |
15 # As a counterpart to the access to the source code and rights to copy, | |
16 # modify and redistribute granted by the license, users are provided only | |
17 # with a limited warranty and the software's author, the holder of the | |
18 # economic rights, and the successive licensors have only limited | |
19 # liability. | |
20 # | |
21 # In this respect, the user's attention is drawn to the risks associated | |
22 # with loading, using, modifying and/or developing or reproducing the | |
23 # software by the user in light of its specific status of free software, | |
24 # that may mean that it is complicated to manipulate, and that also | |
25 # therefore means that it is reserved for developers and experienced | |
26 # professionals having in-depth computer knowledge. Users are therefore | |
27 # encouraged to load and test the software's suitability as regards their | |
28 # requirements in conditions enabling the security of their systems and/or | |
29 # data to be ensured and, more generally, to use and operate it in the | |
30 # same conditions as regards security. | |
31 # | |
32 # The fact that you are presently reading this means that you have had | |
33 # knowledge of the CeCILL license and that you accept its terms. | |
34 | |
35 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
36 from commons.core.utils.FileUtils import FileUtils | |
37 from commons.core.sql.DbFactory import DbFactory | |
38 from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator | |
39 from commons.core.sql.TablePathAdaptator import TablePathAdaptator | |
40 import sys | |
41 import os | |
42 | |
43 ## GFF3Maker exports annotations from a 'path' table into a GFF3 file. | |
44 # | |
45 class GFF3Maker(object): | |
46 | |
47 def __init__(self, inFastaName = "", tablesFileName = "", classifTableName = "", isChado = False, isGFF3WithoutAnnotation = False, isWithSequence = False, areMatchPartsCompulsory = False, configFileName = "", verbose = 0, doMergeIdenticalMatches = False, doSplit = False): | |
48 self._inFastaName = inFastaName | |
49 self._tablesFileName = tablesFileName | |
50 self._classifTableName = classifTableName | |
51 self._isChado = isChado | |
52 self._isGFF3WithoutAnnotation = isGFF3WithoutAnnotation | |
53 self._isWithSequence = isWithSequence | |
54 self._areMatchPartsCompulsory = areMatchPartsCompulsory | |
55 self._configFileName = configFileName | |
56 self._doMergeIdenticalMatches = doMergeIdenticalMatches | |
57 self._doSplit = doSplit | |
58 self._iDB = None | |
59 self._verbose = verbose | |
60 | |
61 def setAttributesFromCmdLine(self): | |
62 description = "GFF3Maker exports annotations from 'path', 'set' and/or 'classif' tables into a GFF3 file\n" | |
63 parser = RepetOptionParser(description = description) | |
64 parser.add_option("-f", "--inseq", dest = "inFastaName", action = "store", type = "string", help = "'seq' table recording the input sequences", default = "") | |
65 parser.add_option("-t", "--tablesfile", dest = "tablesFileName", action = "store", type = "string", help = "tabulated file of table name to use to create the gff3 files (fields: tier name, format, table name)", default = "") | |
66 parser.add_option("-w", "--withSequence", dest = "isWithSequence", action = "store_true", help = "write the sequence at the end of GFF3 file", default = False) | |
67 parser.add_option("-a", "--withoutAnnotation", dest = "isGFF3WithoutAnnotation", action = "store_true", help = "write GFF3 files even if no annotation", default = False) | |
68 parser.add_option("-p", "--matchPart", dest = "areMatchPartsCompulsory", action = "store_true", help = "always associate a match_part to a match", default = False) | |
69 parser.add_option("-i", "--classifTable", dest = "classifTable", action = "store", type = "string", help = "name of the TE library classification table [optional]", default = "") | |
70 parser.add_option("-c", "--chado", dest = "isChado", action = "store_true", help = "Chado compliance", default = False) | |
71 parser.add_option("-m", "--doMergeIdenticalMatches", dest = "doMergeIdenticalMatches", action = "store_true", help = "merge identical matches based on query start, query end, score", default = False) | |
72 parser.add_option("-s", "--doSplit", dest = "doSplit", action = "store_true", help = "split each GFF3 per annotation type", default = False) | |
73 parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "configuration file for database connection", default = "") | |
74 parser.add_option("-v", "--verbose", dest = "verbose", action = "store", type = "int", help = "verbosity level (default=0, else 1 or 2)", default = 0) | |
75 options = parser.parse_args()[0] | |
76 self._setAttributesFromOptions(options) | |
77 | |
78 #TODO: write a "setAttributesFromConfig" | |
79 def _setAttributesFromOptions(self, options): | |
80 self.setInFastaName(options.inFastaName) | |
81 self.setTablesFileName(options.tablesFileName) | |
82 self.setClassifTable(options.classifTable) | |
83 self.setIsChado(options.isChado) | |
84 self.setDoMergeIdenticalMatches(options.doMergeIdenticalMatches) | |
85 self.setIsWithSequence(options.isWithSequence) | |
86 self.setIsGFF3WithoutAnnotation(options.isGFF3WithoutAnnotation) | |
87 self.setAreMatchPartCompulsory(options.areMatchPartsCompulsory) | |
88 self.setDoSplit(options.doSplit) | |
89 self.setConfigFileName(options.configFileName) | |
90 self.setVerbose(options.verbose) | |
91 | |
92 def setInFastaName(self, inFastaName): | |
93 self._inFastaName = inFastaName | |
94 | |
95 def setTablesFileName(self, tablesFileName): | |
96 self._tablesFileName = tablesFileName | |
97 | |
98 def setIsWithSequence(self, isWithSequence): | |
99 self._isWithSequence = isWithSequence | |
100 | |
101 def setIsGFF3WithoutAnnotation(self, isGFF3WithoutAnnotation): | |
102 self._isGFF3WithoutAnnotation = isGFF3WithoutAnnotation | |
103 | |
104 def setAreMatchPartCompulsory(self, areMatchPartsCompulsory): | |
105 self._areMatchPartsCompulsory = areMatchPartsCompulsory | |
106 | |
107 def setClassifTable(self, classifTable): | |
108 self._classifTableName = classifTable | |
109 | |
110 def setIsChado(self, isChado): | |
111 self._isChado = isChado | |
112 | |
113 def setDoMergeIdenticalMatches(self, doMergeIdenticalMatches): | |
114 self._doMergeIdenticalMatches = doMergeIdenticalMatches | |
115 | |
116 def setDoSplit(self, doSplit): | |
117 self._doSplit = doSplit | |
118 | |
119 def setConfigFileName(self, configFileName): | |
120 self._configFileName = configFileName | |
121 | |
122 def setVerbose(self, verbose): | |
123 self._verbose = verbose | |
124 | |
125 def checkOptions(self): | |
126 if self._inFastaName == "": | |
127 raise Exception("ERROR: options -f required") | |
128 | |
129 if self._configFileName != "": | |
130 if not FileUtils.isRessourceExists(self._configFileName): | |
131 raise Exception("ERROR: configuration file does not exist!") | |
132 | |
133 if self._classifTableName and not self._iDB.doesTableExist(self._classifTableName): | |
134 raise Exception("ERROR: classification table '%s' does not exist!" % self._classifTableName) | |
135 | |
136 ## Retrieve the features to write in the GFF3 file. | |
137 # | |
138 # @param pathTable string name of the table recording the annotations (i.e. the features) | |
139 # @param seqName string name of the sequence (the source feature) on which we want to visualize the matches (the features) | |
140 # @param source string the program that generated the feature (i.e. REPET) | |
141 # @param frame string "." by default (or "+", "-") | |
142 # @return pathString string which will be printed in path file | |
143 # | |
144 def _getPathFeatures(self, pathTable, seqTable, seqName, source, feature, frame): | |
145 pathString = "" | |
146 iTPA = TablePathAdaptator(self._iDB, pathTable) | |
147 lPaths = iTPA.getPathListSortedByQueryCoordAndScoreFromQuery(seqName) | |
148 # organise them into 'match' and 'match_part' | |
149 if lPaths: | |
150 dPathID2Data = self._gatherSamePathFeatures(lPaths) | |
151 # build the output string | |
152 for pathID in dPathID2Data: | |
153 pathString += self._organizeEachPathFeature(pathID, dPathID2Data[pathID], seqName, source, frame, seqTable) | |
154 return pathString | |
155 | |
156 ## Gather matches with the same path ID. | |
157 # | |
158 # @param data list of string lists results of a SQL request | |
159 # @return dPathID2Matchs dict whose keys are path IDs and values are matches data | |
160 # | |
161 def _gatherSamePathFeatures(self, lPaths): | |
162 dPathID2Matchs = {} | |
163 iPreviousPath = lPaths[0] | |
164 iPreviousPath.otherTargets = [] | |
165 for iPath in lPaths[1:]: | |
166 if self._doMergeIdenticalMatches and iPreviousPath.getQueryStart() == iPath.getQueryStart() and iPreviousPath.getQueryEnd() == iPath.getQueryEnd() and iPreviousPath.getScore() == iPath.getScore() and iPreviousPath.getSubjectName() != iPath.getSubjectName(): | |
167 iPreviousPath.otherTargets.append("%s %d %d" % (iPath.getSubjectName(), iPath.getSubjectStart(), iPath.getSubjectEnd())) | |
168 else: | |
169 self._addPathToMatchDict(dPathID2Matchs, iPreviousPath) | |
170 iPreviousPath = iPath | |
171 iPreviousPath.otherTargets = [] | |
172 self._addPathToMatchDict(dPathID2Matchs, iPreviousPath) | |
173 return dPathID2Matchs | |
174 | |
175 def _getClassifEvidenceBySeqName(self, seqName): | |
176 lseqName = seqName.split('_') | |
177 seqNameStructure = '%s_%%' % lseqName[0] | |
178 if lseqName[-1] == 'reversed': | |
179 seqNameStructure += '%s_%s' % (lseqName[-2],lseqName[-1]) | |
180 else: | |
181 seqNameStructure += lseqName[-1] | |
182 qry = "SELECT evidence FROM %s WHERE seq_name like \"%s\"" % (self._classifTableName, seqNameStructure) | |
183 self._iDB.execute(qry) | |
184 result = "" | |
185 try: | |
186 result = "".join(self._iDB.fetchall()[0]) | |
187 except: pass | |
188 return result | |
189 | |
190 def _addPathToMatchDict(self, dPathID2Matchs, iPreviousPath): | |
191 pathId = iPreviousPath.getIdentifier() | |
192 subjectStart = iPreviousPath.getSubjectStart() | |
193 subjectEnd = iPreviousPath.getSubjectEnd() | |
194 strand = iPreviousPath.getSubjectStrand() | |
195 if subjectStart > subjectEnd: | |
196 tmp = subjectStart | |
197 subjectStart = subjectEnd | |
198 subjectEnd = tmp | |
199 queryStart = iPreviousPath.getQueryStart() | |
200 queryEnd = iPreviousPath.getQueryEnd() | |
201 subjectName = iPreviousPath.getSubjectName() | |
202 eValue = iPreviousPath.getEvalue() | |
203 identity = iPreviousPath.getIdentity() | |
204 otherTargets = iPreviousPath.otherTargets | |
205 if dPathID2Matchs.has_key(pathId): | |
206 dPathID2Matchs[pathId].append([queryStart, queryEnd, strand, subjectName, subjectStart, subjectEnd, eValue, identity, otherTargets]) | |
207 else: | |
208 dPathID2Matchs[pathId] = [[queryStart, queryEnd, strand, subjectName, subjectStart, subjectEnd, eValue, identity, otherTargets]] | |
209 | |
210 def _getConsensusLengthByTargetName(self, targetName, seqTableName): | |
211 iTableSeqAdaptator = TableSeqAdaptator(self._iDB, seqTableName) | |
212 return iTableSeqAdaptator.getSeqLengthFromDescription(targetName) | |
213 | |
214 ## For a specific path ID, organize match data according to the GFF3 format. | |
215 # | |
216 # @param pathID string path ID | |
217 # @param lMatches match list | |
218 # @param seqName string name of the source feature | |
219 # @param source string 'source' field for GFF3 format | |
220 # @param frame string 'frame' field for GFF3 format | |
221 # @return lines string to write in the GFF3 file | |
222 # | |
223 def _organizeEachPathFeature(self, pathID, lMatches, seqName, source, frame, seqTable = ""): | |
224 lines = "" | |
225 minStart = lMatches[0][0] | |
226 maxEnd = lMatches[0][1] | |
227 minStartSubject = lMatches[0][4] | |
228 maxEndSubject = lMatches[0][5] | |
229 strand = lMatches[0][2] | |
230 | |
231 # for each match | |
232 for i in lMatches: | |
233 if i[0] < minStart: | |
234 minStart = i[0] | |
235 if i[1] > maxEnd: | |
236 maxEnd = i[1] | |
237 if i[4] < minStartSubject: | |
238 minStartSubject = i[4] | |
239 if i[5] > maxEndSubject: | |
240 maxEndSubject = i[5] | |
241 | |
242 target = lMatches[0][3] | |
243 | |
244 targetDescTag = "" | |
245 if self._classifTableName != "": | |
246 targetDescTag = self._getClassifEvidenceBySeqName(target) | |
247 if targetDescTag != "": | |
248 targetDescTag = ";TargetDescription=%s" % targetDescTag.replace('=', ':').replace(';', '').replace(',', ' |') | |
249 | |
250 targetLengthTag = "" | |
251 if seqTable != "": | |
252 targetLengthTag = self._getConsensusLengthByTargetName(target, seqTable) | |
253 targetLengthTag = ";TargetLength=%i" % targetLengthTag | |
254 | |
255 attributes = "ID=ms%s_%s_%s" % (pathID, seqName, target) | |
256 if self._isChado: | |
257 attributes += ";Target=%s+%s+%s" % (target, minStartSubject, maxEndSubject) | |
258 else: | |
259 attributes += ";Target=%s %s %s" % (target, minStartSubject, maxEndSubject) | |
260 | |
261 if lMatches[0][8]: | |
262 otherTargets = ", ".join(lMatches[0][8]) | |
263 attributes += ";OtherTargets=%s" % otherTargets | |
264 else: | |
265 attributes += targetLengthTag | |
266 attributes += targetDescTag | |
267 if len(lMatches) == 1: | |
268 attributes += ";Identity=%s" % lMatches[0][7] | |
269 | |
270 | |
271 lines += "%s\t%s\tmatch\t%s\t%s\t0.0\t%s\t%s\t%s\n" % (seqName, source, minStart, maxEnd, strand, frame, attributes) | |
272 | |
273 if len(lMatches) > 1 or self._areMatchPartsCompulsory: | |
274 count = 1 | |
275 for i in lMatches: | |
276 attributes = "ID=mp%s-%i_%s_%s" % (pathID, count, seqName, target) | |
277 attributes += ";Parent=ms%s%s%s%s%s" % (pathID, "_", seqName, "_", target) | |
278 if self._isChado: | |
279 attributes += ";Target=%s+%s+%s" % (target, i[4], i[5]) | |
280 else: | |
281 attributes += ";Target=%s %s %s" % (target, i[4], i[5]) | |
282 | |
283 if not i[8]: | |
284 attributes += ";Identity=%s" % i[7] | |
285 lines += "%s\t%s\tmatch_part\t%s\t%s\t%s\t%s\t%s\t%s\n" % (seqName, source, i[0], i[1], i[6], i[2], frame, attributes) | |
286 count += 1 | |
287 | |
288 return lines | |
289 | |
290 ## Retrieve the features to write in the GFF3 file. | |
291 # | |
292 # @param table string name of the table recording the annotations (i.e. the features) | |
293 # @param key string name of the sequence (the source feature) on which we want to visualize the matches (the features) | |
294 # @param source string the program that generated the feature (i.e. REPET) | |
295 # @param frame string "." by default (or "+", "-") | |
296 # @return setString string which will be printed in set file | |
297 # | |
298 def _getSetFeatures(self, table, key, source, feature, frame): | |
299 setString = "" | |
300 # retrieve all the data about the matches | |
301 qry = "SELECT DISTINCT path,name,start,end FROM %s WHERE chr=\"%s\"" % (table, key) | |
302 self._iDB.execute(qry) | |
303 data = self._iDB.fetchall() | |
304 # organise them into 'match' and 'match_part' | |
305 dPathID2Data = self._gatherSameSetFeatures(data) | |
306 # build the output string | |
307 for pathID in dPathID2Data: | |
308 setString += self._organizeEachSetFeature(pathID, dPathID2Data[pathID], key, source, frame) | |
309 return setString | |
310 | |
311 ## Gather matches with the same path ID. | |
312 # | |
313 # @param data list of string lists results of a SQL request | |
314 # @return dSetID2Matchs dict whose keys are set IDs and values are matches data | |
315 # | |
316 def _gatherSameSetFeatures(self, data): | |
317 dSetID2Matchs = {} | |
318 | |
319 for i in data: | |
320 setID = i[0] | |
321 name = i[1] | |
322 start = i[2] | |
323 end = i[3] | |
324 | |
325 matchStart = min(start, end) | |
326 matchEnd = max(start, end) | |
327 strand = self._getStrand(start, end) | |
328 | |
329 if dSetID2Matchs.has_key(setID): | |
330 dSetID2Matchs[setID].append([name, matchStart, matchEnd, strand]) | |
331 | |
332 else: | |
333 dSetID2Matchs[setID] = [[name, matchStart, matchEnd, strand]] | |
334 | |
335 return dSetID2Matchs | |
336 | |
337 ## For a specific set ID, organize match data according to the GFF3 format. | |
338 # | |
339 # @param setID string path ID | |
340 # @param lMatches match list | |
341 # @param seqName string name of the source feature | |
342 # @param source string 'source' field for GFF3 format | |
343 # @param frame string 'frame' field for GFF3 format | |
344 # @return lines string to write in the GFF3 file | |
345 # | |
346 def _organizeEachSetFeature(self, setID, lMatches, seqName, source, frame): | |
347 lines = "" | |
348 minStart = min(lMatches[0][1], lMatches[0][2]) | |
349 maxEnd = max(lMatches[0][1], lMatches[0][2]) | |
350 strand = lMatches[0][3] | |
351 | |
352 # for each match | |
353 for i in lMatches: | |
354 start = min(i[1],i[2]) | |
355 if start < minStart: | |
356 minStart = start | |
357 end = max(i[1],i[2]) | |
358 if end > maxEnd: | |
359 maxEnd = end | |
360 | |
361 target = lMatches[0][0].replace("(","").replace(")","").replace("#","") | |
362 | |
363 attributes = "ID=ms%s_%s_%s" % (setID, seqName, target) | |
364 if self._isChado: | |
365 attributes += ";Target=%s+%s+%s" % (target, "1", abs(minStart-maxEnd)+1) | |
366 else: | |
367 attributes += ";Target=%s %s %s" % (target, "1", abs(minStart-maxEnd)+1) | |
368 lines += "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (seqName, source, "match", minStart, maxEnd, "0.0", strand, frame, attributes) | |
369 | |
370 if len(lMatches) > 1 or self._areMatchPartsCompulsory: | |
371 count = 1 | |
372 for i in lMatches: | |
373 attributes = "ID=mp%s-%i_%s_%s" % (setID, count, seqName, target) | |
374 attributes += ";Parent=ms%s%s%s%s%s" % (setID, "_", seqName, "_", target) | |
375 if self._isChado: | |
376 attributes += ";Target=%s+%s+%s" % (target, "1", abs(i[1]-i[2])+1) | |
377 else: | |
378 attributes += ";Target=%s %s %s" % (target, "1", abs(i[1]-i[2])+1) | |
379 lines += "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (seqName, source, "match_part", i[1], i[2], "0.0", i[3], frame, attributes) | |
380 count += 1 | |
381 | |
382 return lines | |
383 | |
384 ## Return the strand ('+' if start < end, '-' otherwise). | |
385 # | |
386 # @param start integer start coordinate | |
387 # @param end integer end coordinate | |
388 # @return strand string "+" or "-" | |
389 # | |
390 def _getStrand(self, start, end): | |
391 if start < end: | |
392 return "+" | |
393 else: | |
394 return "-" | |
395 | |
396 def run(self): | |
397 #TODO: cat all gff in one gff file in option | |
398 if self._configFileName == "": | |
399 self._iDB = DbFactory.createInstance() | |
400 else: | |
401 self._iDB = DbFactory.createInstance(self._configFileName) | |
402 | |
403 self.checkOptions() | |
404 if self._verbose > 0: | |
405 print "START GFF3Maker" | |
406 sys.stdout.flush() | |
407 | |
408 tablesFile = open(self._tablesFileName, "r") | |
409 linesFromAnnotationTablesFile = tablesFile.readlines() | |
410 tablesFile.close() | |
411 feature = "region" | |
412 frame = "." | |
413 | |
414 iTSA = TableSeqAdaptator(self._iDB, self._inFastaName) | |
415 lTuples = iTSA.getAccessionAndLengthList() | |
416 for seqName, length in lTuples : | |
417 if not self._doSplit: | |
418 fileName = "%s.gff3" % seqName | |
419 outFile = open(fileName, "w") | |
420 outFile.write("##gff-version 3\n") | |
421 outFile.write("##sequence-region %s 1 %s\n" % (seqName, length)) | |
422 for line in linesFromAnnotationTablesFile: | |
423 if line[0] == "#": | |
424 continue | |
425 tok = line.split() | |
426 if len(tok) == 0: | |
427 break | |
428 source = tok[0] | |
429 format = tok[1] | |
430 table = tok[2] | |
431 tableseq = "" | |
432 if len(tok) == 4: | |
433 tableseq = tok[3] | |
434 if format == 'path' : | |
435 annotations = self._getPathFeatures(table, tableseq, seqName, source, feature, frame) | |
436 elif format == 'set' : | |
437 annotations = self._getSetFeatures(table, seqName, source, feature, frame) | |
438 else: | |
439 raise Exception("Wrong format : %s" % format) | |
440 outFile.write(annotations) | |
441 outFile.close() | |
442 #TODO: check getNbLinesInSingleFile() to handle big files | |
443 if not self._isGFF3WithoutAnnotation and FileUtils.getNbLinesInSingleFile(fileName) == 2: | |
444 os.remove(fileName) | |
445 elif self._isWithSequence: | |
446 outFile = open(fileName, "a") | |
447 outFile.write("##FASTA\n") | |
448 iBioseq = iTSA.getBioseqFromHeader(seqName) | |
449 iBioseq.write(outFile) | |
450 outFile.close() | |
451 else: | |
452 count = 1 | |
453 for line in linesFromAnnotationTablesFile: | |
454 if line[0] == "#": | |
455 continue | |
456 tok = line.split() | |
457 if len(tok) == 0: | |
458 break | |
459 source = tok[0] | |
460 format = tok[1] | |
461 table = tok[2] | |
462 tableseq = "" | |
463 if len(tok) == 4: | |
464 tableseq = tok[3] | |
465 fileName = "%s_Annot%i.gff3" % (seqName, count) | |
466 outFile = open(fileName, "w") | |
467 outFile.write("##gff-version 3\n") | |
468 outFile.write("##sequence-region %s 1 %s\n" % (seqName, length)) | |
469 if format == 'path' : | |
470 annotations = self._getPathFeatures(table, tableseq, seqName, source, feature, frame) | |
471 elif format == 'set' : | |
472 annotations = self._getSetFeatures(table, seqName, source, feature, frame) | |
473 else: | |
474 raise Exception("Wrong format : %s" % format) | |
475 outFile.write(annotations) | |
476 outFile.close() | |
477 #TODO: check getNbLinesInSingleFile() to handle big files | |
478 if not self._isGFF3WithoutAnnotation and FileUtils.getNbLinesInSingleFile(fileName) == 2: | |
479 os.remove(fileName) | |
480 elif self._isWithSequence: | |
481 outFile = open(fileName, "a") | |
482 outFile.write("##FASTA\n") | |
483 iBioseq = iTSA.getBioseqFromHeader(seqName) | |
484 iBioseq.write(outFile) | |
485 outFile.close() | |
486 count += 1 | |
487 | |
488 self._iDB.close() | |
489 | |
490 if self._verbose > 0: | |
491 print "END GFF3Maker" | |
492 sys.stdout.flush() | |
493 | |
494 if __name__ == "__main__": | |
495 iGFF3Maker = GFF3Maker() | |
496 iGFF3Maker.setAttributesFromCmdLine() | |
497 iGFF3Maker.run() | |
498 |