changeset 104:a22c909c9b57

stream output
author Jan Kanis <jan.code@jankanis.nl>
date Mon, 07 Jul 2014 11:29:10 +0200
parents 86bcf17f50ef
children b3b5ee557170
files blast2html.py
diffstat 1 files changed, 21 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/blast2html.py	Tue Jul 01 17:18:09 2014 +0200
+++ b/blast2html.py	Mon Jul 07 11:29:10 2014 +0200
@@ -11,7 +11,7 @@
 import sys
 import math
 import warnings
-import six, codecs
+import six, codecs, io
 from six.moves import builtins
 from os import path
 from itertools import repeat
@@ -265,13 +265,12 @@
                   ('Database', self.blast.BlastOutput_db),
         )
 
-        result = template.render(blast=self.blast,
+        result = template.stream(blast=self.blast,
                                  iterations=self.blast.BlastOutput_iterations.Iteration,
                                  colors=self.colors,
                                  params=params)
-        if six.PY2:
-            result = result.encode('utf-8')
-        output.write(result)
+
+        result.dump(output)
 
     @filter
     def match_colors(self, result):
@@ -366,8 +365,8 @@
             return text if display_nolink else ''
         args = dict(id=hitid(hit).split('|'),
                     fullid=hitid(hit),
-                    defline=str(hit.Hit_def).split('|'),
-                    fulldefline=str(hit.Hit_def),
+                    defline=str(hit.Hit_def).split(' ', 1)[0].split('|'),
+                    fulldefline=str(hit.Hit_def).split(' ', 1)[0],
                     accession=str(hit.Hit_accession))
         try:
             link = template.format(**args)
@@ -450,6 +449,20 @@
     if args.input == None:
         parser.error('no input specified')
 
+    if six.PY2:
+        # The argparse.FileType wrapper doesn't support an encoding
+        # argument or such, so for python 2 we need to wrap or reopen
+        # the output. The input files are already read as utf-8 by the
+        # respective libraries.
+
+        # One option is using codecs, but the codecs' writelines()
+        # method doesn't support streaming but collects all output and
+        # writes at once. On the other hand the io module is slower
+        # (though not significantly).
+
+        # args.output = codecs.getwriter('utf-8')(args.output)
+        args.output = io.open(args.output.name, 'w')
+    
     templatedir, templatename = path.split(args.template.name)
     args.template.close()
     if not templatedir:
@@ -464,6 +477,7 @@
 
     b = BlastVisualize(args.input, templatedir, templatename, genelinks)
     b.render(args.output)
+    args.output.close()
 
 
 if __name__ == '__main__':