diff subsearch.py @ 13:1400d1977e7b draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
author bgruening
date Mon, 19 Oct 2020 14:48:13 +0000
parents 1c66bf08f687
children 4242b4d68e9c
line wrap: on
line diff
--- a/subsearch.py	Tue Jul 28 08:41:13 2020 -0400
+++ b/subsearch.py	Mon Oct 19 14:48:13 2020 +0000
@@ -4,36 +4,41 @@
     Output: Moleculs filtered with specified substructures.
     Copyright 2013, Bjoern Gruening and Xavier Lucas
 """
-import sys, os
 import argparse
 import multiprocessing
-import tempfile
+import os
+import shutil
 import subprocess
-import shutil
+import sys
+import tempfile
 
 from openbabel import openbabel, pybel
 openbabel.obErrorLog.StopLogging()
 
+
 def parse_command_line():
     parser = argparse.ArgumentParser()
     parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
     parser.add_argument('--iformat', help='Input format.')
-    parser.add_argument('--fastsearch-index', dest="fastsearch_index", 
-        required=True, help='Path to the openbabel fastsearch index.')
+    parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True,
+                        help='Path to the openbabel fastsearch index.')
     parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
-    parser.add_argument('--oformat', 
-        default='smi', help='Output file format')
-    parser.add_argument("--max-candidates", dest="max_candidates", type=int,
-                    default=4000, help="The maximum number of candidates.")
-    parser.add_argument('-p', '--processors', type=int, 
-        default=multiprocessing.cpu_count())
+    parser.add_argument('--oformat', default='smi', help='Output file format')
+    parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000,
+                        help="The maximum number of candidates.")
+    parser.add_argument('-p', '--processors', type=int,
+                        default=multiprocessing.cpu_count())
     return parser.parse_args()
 
+
 results = list()
+
+
 def mp_callback(res):
     results.append(res)
 
-def mp_helper( query, args ):
+
+def mp_helper(query, args):
     """
         Helper function for multiprocessing.
         That function is a wrapper around the following command:
@@ -48,8 +53,7 @@
     tmp = tempfile.NamedTemporaryFile(delete=False)
     cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates)
 
-    child = subprocess.Popen(cmd.split(),
-        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
     stdout, stderr = child.communicate()
     return_code = child.returncode
@@ -65,43 +69,43 @@
     return (tmp.name, query)
 
 
-def get_smiles_or_smarts( args ):
+def get_smiles_or_smarts(args):
     """
     Wrapper to retrieve a striped SMILES or SMARTS string from different input formats.
     """
     if args.iformat in ['smi', 'text', 'tabular']:
-        with open( args.infile ) as text_file:
+        with open(args.infile) as text_file:
             for line in text_file:
                 yield line.split('\t')[0].strip()
     else:
         # inchi or sdf files
-        for mol in pybel.readfile( args.iformat, args.infile ):
+        for mol in pybel.readfile(args.iformat, args.infile):
             yield mol.write('smiles').split('\t')[0]
 
-def substructure_search( args ):
 
-    pool = multiprocessing.Pool( args.processors )
-    for query in get_smiles_or_smarts( args ):
+def substructure_search(args):
+    pool = multiprocessing.Pool(args.processors)
+    for query in get_smiles_or_smarts(args):
         pool.apply_async(mp_helper, args=(query, args), callback=mp_callback)
-        #mp_callback( mp_helper(query, args) )
+        # mp_callback(mp_helper(query, args))
     pool.close()
     pool.join()
 
     if args.oformat == 'names':
-        out_handle = open( args.outfile, 'w' )
+        out_handle = open(args.outfile, 'w')
         for result_file, query in results:
             with open(result_file) as res_handle:
                 for line in res_handle:
-                    out_handle.write('%s\t%s\n' % ( line.strip(), query ))
-            os.remove( result_file )
+                    out_handle.write('%s\t%s\n' % (line.strip(), query))
+            os.remove(result_file)
         out_handle.close()
     else:
-        out_handle = open( args.outfile, 'wb' )
+        out_handle = open(args.outfile, 'wb')
         for result_file, query in results:
-            res_handle = open(result_file,'rb')
-            shutil.copyfileobj( res_handle, out_handle )
+            res_handle = open(result_file, 'rb')
+            shutil.copyfileobj(res_handle, out_handle)
             res_handle.close()
-            os.remove( result_file )
+            os.remove(result_file)
         out_handle.close()
 
 
@@ -110,7 +114,8 @@
         Multiprocessing Open Babel Substructure Search.
     """
     args = parse_command_line()
-    substructure_search( args )
+    substructure_search(args)
+
 
-if __name__ == "__main__" :
+if __name__ == "__main__":
     __main__()