diff antiSMASH_wrapper.py @ 0:6a37d0a4510a default tip

initial uploaded
author bjoern-gruening
date Thu, 15 Mar 2012 05:23:03 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/antiSMASH_wrapper.py	Thu Mar 15 05:23:03 2012 -0400
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+import os, sys, subprocess, commands
+import random, shutil
+import zipfile
+
+
+blastdbpath = '/home/galaxy/bin/antismash-1.1.0/db'
+pfamdbpath = '/home/galaxy/bin/antismash-1.1.0/db'
+antismash_path = '/home/galaxy/bin/antismash-1.1.0/antismash.py'
+
+
+def zipper(dir, zip_file):
+    zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
+    root_len = len(os.path.abspath(dir))
+    for root, dirs, files in os.walk(dir):
+        # only inlcude the result directory
+        # assumption, each galaxy file and so the result directory starts with dataset_xxx
+        if root.find('dataset_') != -1:
+            archive_root = os.path.abspath(root)[root_len:]
+            for f in files:
+                fullpath = os.path.join(root, f)
+                archive_name = os.path.join(archive_root, f)
+                zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED)
+    zip.close()
+    return zip_file
+
+
+def anitSMASH(args):
+    #./antismash.py Tue6071_genome.fasta --geneclustertypes 1 --fullhmm y
+    rint = random.randint(1,10000000)
+    tmp_dir = '/tmp/galaxy_%s' % rint
+    os.mkdir(tmp_dir)
+    os.mkdir(os.path.join( tmp_dir, 'geneprediction' ))
+    os.chdir(tmp_dir)
+    new_input_path = os.path.join(tmp_dir, os.path.basename(args.input) + '.fasta')
+
+    # try to generate the same name as in antismash.py
+    genomename = ".".join( (os.path.basename(args.input) + '.fasta').split(".")[:-1] )
+    for i in """!"#$%&()*+,./:;=>?@[]^`{|}'""":
+        genomename = genomename.replace(i,"")
+    result_path = os.path.join( tmp_dir, genomename )
+
+    shutil.copy(args.input, new_input_path )
+
+    if args.eukaryotic:
+        taxon = '--taxon e'
+    else:
+        taxon = '--taxon p'
+
+    if args.clusterblast:
+        clusterblast = '--clusterblast y'
+    else:
+        clusterblast = '--clusterblast n'
+
+    if args.smcogs:
+        smcogs = '--smcogs y'
+    else:
+        smcogs = '--smcogs n'
+
+    if args.fullhmm:
+        fullhmm = '--fullhmm y'
+    else:
+        fullhmm = '--fullhmm n'
+
+    if args.fullblast:
+        fullblast = '--fullblast y'
+    else:
+        fullblast = '--fullblast n'
+
+    h = [antismash_path, new_input_path, 
+        '--geneclustertypes %s' % args.geneclustertypes, 
+        taxon, 
+        clusterblast, 
+        smcogs, 
+        fullhmm,
+        fullblast,
+        '--glimmer_prediction %s' % args.glimmer_prediction,
+        '--blastdbpath %s' % blastdbpath,
+        '--pfamdbpath %s' % pfamdbpath,
+        '--cores 10',
+        ]
+    a = ' '.join(h)
+    subprocess.call(a, shell=True)
+
+
+    shutil.copy(os.path.join(result_path, '%s.final.embl' % genomename), args.embl_path)
+
+    clustername_mapping = {}
+    for line in open( os.path.join(result_path, 'clusterblast/geneclusters.txt') ):
+        token = line.split('\t')
+        clustername_mapping[token[2]] = token[3]
+
+    for line in open( os.path.join(result_path, 'clusterblast/geneclusterprots.fasta') ):
+        if line.startswith('>'):
+            for k,v in clustername_mapping.items():
+                if '|%s|' % k in line:
+                    args.geneclusterprots.write( line.replace('|%s|' % k, '|%s|%s|' % (k,v)) )
+        else:
+            args.geneclusterprots.write( line )
+
+    zipper(result_path, args.zip)
+
+    # html output
+    shutil.copy( os.path.join(result_path, 'display.xhtml'), args.html_file)
+    os.mkdir( args.html_path )
+    html_dest_path = os.path.join(args.html_path, 'html/')
+    images_dest_path = os.path.join(args.html_path, 'images/')
+    svg_dest_path = os.path.join(args.html_path, 'svg/')
+    substrspecs_dest_path = os.path.join(args.html_path, 'substrspecs/')
+    shutil.copytree( os.path.join(result_path, 'html/'), html_dest_path)
+    shutil.copytree( os.path.join(result_path, 'images/'), images_dest_path)
+    shutil.copytree( os.path.join(result_path, 'svg/'), svg_dest_path)
+    shutil.copytree( os.path.join(result_path, 'substrspecs/'), substrspecs_dest_path)
+    shutil.copy( os.path.join(result_path, 'jquery.svg.js'), args.html_path ) 
+    shutil.copy( os.path.join(result_path, 'jquery.svgdom.js'), args.html_path ) 
+    shutil.copy( os.path.join(result_path, 'jquery-1.4.2.min.js'), args.html_path ) 
+    shutil.copy( os.path.join(result_path, 'style.css'), args.html_path ) 
+
+    # remove tmp directory
+    shutil.rmtree(tmp_dir)
+
+
+def arg_parse():
+    import argparse
+    parser = argparse.ArgumentParser(prog = 'antiSMASH-Wrapper')
+    parser.add_argument('--version', action='version', version='%(prog)s 0.01')
+    parser.add_argument('--geneclustertypes',
+                   help='Fingerprint Type, currently FP2, FP3, FP4')
+    parser.add_argument('--clusterblast', action='store_true')
+    parser.add_argument('--eukaryotic', action='store_true')
+    parser.add_argument('--fullhmm', action='store_true')
+    parser.add_argument('--smcogs', action='store_true')
+    parser.add_argument('--fullblast', action='store_true')
+
+    parser.add_argument('--input', '-i', help='FASTA Sequence File')
+    parser.add_argument('--glimmer_prediction', help='Glimmer Prediction File')
+
+    parser.add_argument('--zip', help='output: all files as zip file')
+    parser.add_argument('--html_file', help='output: the path to the index html file')
+    parser.add_argument('--html_path', help='output: the path to the output html dir')
+    parser.add_argument('--embl_path', help='output: the path to the embl output file')
+    parser.add_argument('--geneclusterprots', help='output: Genecluster Fasta File', type=argparse.FileType('w'))
+
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+    args = arg_parse()
+    anitSMASH(args)
+