changeset 116:f5066973029a

refactor
author Jan Kanis <jan.code@jankanis.nl>
date Mon, 14 Jul 2014 15:47:48 +0200
parents 0c2a03f9740b
children 8ae714069687
files blast2html.py
diffstat 1 files changed, 28 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/blast2html.py	Mon Jul 14 15:01:32 2014 +0200
+++ b/blast2html.py	Mon Jul 14 15:47:48 2014 +0200
@@ -238,10 +238,9 @@
 
     max_scale_labels = 10
 
-    def __init__(self, input, templatedir, templatename, dbname, genelinks={}):
+    def __init__(self, input, templatedir, templatename, genelinks):
         self.input = input
         self.templatename = templatename
-        self.dbname = dbname
         self.genelinks = genelinks
 
         self.blast = objectify.parse(self.input).getroot()
@@ -359,16 +358,13 @@
         
         db = hit.getroottree().getroot().BlastOutput_db
 
-        if isinstance(self.genelinks, six.string_types):
-            template = self.genelinks
-        else:
-            template = self.genelinks[db].template
+        template = self.genelinks[db].template
 
         if text is None:
             if text_from == 'hitid':
                 text = hitid(hit)
             elif text_from == 'dbname':
-                text = self.dbname or self.genelinks[db].dbname or 'Gene Bank'
+                text = self.genelinks[db].dbname
             else:
                 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from))
 
@@ -391,8 +387,8 @@
 
 
 genelinks_entry = namedtuple('genelinks_entry', 'dbname template')
-def read_genelinks(dir):
-    links = defaultdict(lambda: genelinks_entry(None, None))
+def read_blastdb(dir, default):
+    links = defaultdict(lambda: default)
     # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc.
     files = sorted(glob.glob(path.join(dir, 'blastdb*.loc')))
     # reversed, so blastdb.loc will take precedence
@@ -404,7 +400,7 @@
                     continue
                 line = l.rstrip('\n').split('\t')
                 try:
-                    links[line[2]] = genelinks_entry(dbname=line[3], template=line[4])
+                    links[line[2]] = genelinks_entry(dbname=line[3] or default.dbname, template=line[4])
                 except IndexError:
                     continue
             f.close()
@@ -422,7 +418,7 @@
     default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
 
     parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
-                                     usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE]".format(sys.argv[0]))
+                                     usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE] [--dbname DBNAME]".format(sys.argv[0]))
     input_group = parser.add_mutually_exclusive_group(required=True)
     input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
                              help='The input Blast XML file, same as -i/--input')
@@ -438,26 +434,25 @@
     parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
                         help='The template file to use. Defaults to blast_html.html.jinja')
 
-    parser.add_argument('--dbname', type=str, default=None,
+    parser.add_argument('--dbname', type=str, default='Gene Bank',
                         help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'")
-    dblink_group = parser.add_mutually_exclusive_group()
-    dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE',
-                              default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
-                              help="""A link template to link hits to a gene bank webpage. The template string is a 
-                              Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, 
-                              {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be 
-                              replaced by the Nth element of the id or defline, where '|' is the field separator. 
+    parser.add_argument('--genelink-template', metavar='URL_TEMPLATE',
+                        default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
+                        help="""A link template to link hits to a gene bank webpage. The template string is a 
+                        Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, 
+                        {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be 
+                        replaced by the Nth element of the id or defline, where '|' is the field separator. 
+                        
+                        The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
+                        which is a link to the NCBI nucleotide database.""")
 
-                              The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
-                              which is a link to the NCBI nucleotide database.""")
-
-    dblink_group.add_argument('--db-config-dir',
-                              help="""The directory where databases are configured in blastdb*.loc files. These files
-                              are consulted for creating a gene bank link. The files should be tab-separated tables (with lines
-                              starting with '#' ignored), where the third field of a line should be a database path and the fourth
-                              a genebank link template conforming to the --genelink-template option syntax.
-
-                              This option is incompatible with --genelink-template.""")
+    parser.add_argument('--db-config-dir',
+                        help="""The directory where databases are configured in blastdb*.loc files. These files
+                        are consulted for creating a gene bank link. The files should conform to the format that
+                        Galaxy's BLAST expect, i.e. tab-separated tables (with lines starting with '#' ignored),
+                        with two extra fields. The third field of a line should be a database path and the fourth
+                        a genebank link template conforming to the --genelink-template option syntax. Entries in
+                        these config files override links specified using --genelink-template and --dbname.""")
     
     args = parser.parse_args()
     if args.input == None:
@@ -491,14 +486,15 @@
     if not templatedir:
         templatedir = '.'
 
+    defaultentry = genelinks_entry(args.dbname, args.genelink_template)
     if args.db_config_dir is None:
-        genelinks = defaultdict(lambda: genelinks_entry(template=args.genelink_template, dbname=None))
+        genelinks = defaultdict(lambda: defaultentry)
     elif not path.isdir(args.db_config_dir):
         parser.error('db-config-dir does not exist or is not a directory')
     else:
-        genelinks = read_genelinks(args.db_config_dir)
+        genelinks = read_blastdb(args.db_config_dir, default=defaultentry)
 
-    b = BlastVisualize(args.input, templatedir, templatename, dbname=args.dbname, genelinks=genelinks)
+    b = BlastVisualize(args.input, templatedir, templatename, genelinks=genelinks)
     b.render(args.output)
     args.output.close()