comparison blast2html.py @ 104:a22c909c9b57

stream output
author Jan Kanis <jan.code@jankanis.nl>
date Mon, 07 Jul 2014 11:29:10 +0200
parents 86bcf17f50ef
children b3b5ee557170
comparison
equal deleted inserted replaced
103:86bcf17f50ef 104:a22c909c9b57
9 from __future__ import unicode_literals, division 9 from __future__ import unicode_literals, division
10 10
11 import sys 11 import sys
12 import math 12 import math
13 import warnings 13 import warnings
14 import six, codecs 14 import six, codecs, io
15 from six.moves import builtins 15 from six.moves import builtins
16 from os import path 16 from os import path
17 from itertools import repeat 17 from itertools import repeat
18 from collections import defaultdict 18 from collections import defaultdict
19 import glob 19 import glob
263 ('Query length', self.blast["BlastOutput_query-len"]), 263 ('Query length', self.blast["BlastOutput_query-len"]),
264 ('Program', self.blast.BlastOutput_version), 264 ('Program', self.blast.BlastOutput_version),
265 ('Database', self.blast.BlastOutput_db), 265 ('Database', self.blast.BlastOutput_db),
266 ) 266 )
267 267
268 result = template.render(blast=self.blast, 268 result = template.stream(blast=self.blast,
269 iterations=self.blast.BlastOutput_iterations.Iteration, 269 iterations=self.blast.BlastOutput_iterations.Iteration,
270 colors=self.colors, 270 colors=self.colors,
271 params=params) 271 params=params)
272 if six.PY2: 272
273 result = result.encode('utf-8') 273 result.dump(output)
274 output.write(result)
275 274
276 @filter 275 @filter
277 def match_colors(self, result): 276 def match_colors(self, result):
278 """ 277 """
279 An iterator that yields lists of length-color pairs. 278 An iterator that yields lists of length-color pairs.
364 template = self.genelinks.get(db) 363 template = self.genelinks.get(db)
365 if template is None: 364 if template is None:
366 return text if display_nolink else '' 365 return text if display_nolink else ''
367 args = dict(id=hitid(hit).split('|'), 366 args = dict(id=hitid(hit).split('|'),
368 fullid=hitid(hit), 367 fullid=hitid(hit),
369 defline=str(hit.Hit_def).split('|'), 368 defline=str(hit.Hit_def).split(' ', 1)[0].split('|'),
370 fulldefline=str(hit.Hit_def), 369 fulldefline=str(hit.Hit_def).split(' ', 1)[0],
371 accession=str(hit.Hit_accession)) 370 accession=str(hit.Hit_accession))
372 try: 371 try:
373 link = template.format(**args) 372 link = template.format(**args)
374 except Exception as e: 373 except Exception as e:
375 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e)) 374 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e))
448 if args.input == None: 447 if args.input == None:
449 args.input = args.positional_arg 448 args.input = args.positional_arg
450 if args.input == None: 449 if args.input == None:
451 parser.error('no input specified') 450 parser.error('no input specified')
452 451
452 if six.PY2:
453 # The argparse.FileType wrapper doesn't support an encoding
454 # argument or such, so for python 2 we need to wrap or reopen
455 # the output. The input files are already read as utf-8 by the
456 # respective libraries.
457
458 # One option is using codecs, but the codecs' writelines()
459 # method doesn't support streaming but collects all output and
460 # writes at once. On the other hand the io module is slower
461 # (though not significantly).
462
463 # args.output = codecs.getwriter('utf-8')(args.output)
464 args.output = io.open(args.output.name, 'w')
465
453 templatedir, templatename = path.split(args.template.name) 466 templatedir, templatename = path.split(args.template.name)
454 args.template.close() 467 args.template.close()
455 if not templatedir: 468 if not templatedir:
456 templatedir = '.' 469 templatedir = '.'
457 470
462 else: 475 else:
463 genelinks = read_genelinks(args.db_config_dir) 476 genelinks = read_genelinks(args.db_config_dir)
464 477
465 b = BlastVisualize(args.input, templatedir, templatename, genelinks) 478 b = BlastVisualize(args.input, templatedir, templatename, genelinks)
466 b.render(args.output) 479 b.render(args.output)
480 args.output.close()
467 481
468 482
469 if __name__ == '__main__': 483 if __name__ == '__main__':
470 main() 484 main()
471 485