Mercurial > repos > bgruening > openbabel_subsearch
diff subsearch.py @ 13:bd678d7db2ae draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
author | bgruening |
---|---|
date | Mon, 19 Oct 2020 14:39:41 +0000 |
parents | 171c94786a56 |
children | 9adf3fae2771 |
line wrap: on
line diff
--- a/subsearch.py Tue Jul 28 08:32:24 2020 -0400 +++ b/subsearch.py Mon Oct 19 14:39:41 2020 +0000 @@ -4,36 +4,41 @@ Output: Moleculs filtered with specified substructures. Copyright 2013, Bjoern Gruening and Xavier Lucas """ -import sys, os import argparse import multiprocessing -import tempfile +import os +import shutil import subprocess -import shutil +import sys +import tempfile from openbabel import openbabel, pybel openbabel.obErrorLog.StopLogging() + def parse_command_line(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', required=True, help='Molecule file.') parser.add_argument('--iformat', help='Input format.') - parser.add_argument('--fastsearch-index', dest="fastsearch_index", - required=True, help='Path to the openbabel fastsearch index.') + parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True, + help='Path to the openbabel fastsearch index.') parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') - parser.add_argument('--oformat', - default='smi', help='Output file format') - parser.add_argument("--max-candidates", dest="max_candidates", type=int, - default=4000, help="The maximum number of candidates.") - parser.add_argument('-p', '--processors', type=int, - default=multiprocessing.cpu_count()) + parser.add_argument('--oformat', default='smi', help='Output file format') + parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000, + help="The maximum number of candidates.") + parser.add_argument('-p', '--processors', type=int, + default=multiprocessing.cpu_count()) return parser.parse_args() + results = list() + + def mp_callback(res): results.append(res) -def mp_helper( query, args ): + +def mp_helper(query, args): """ Helper function for multiprocessing. That function is a wrapper around the following command: @@ -48,8 +53,7 @@ tmp = tempfile.NamedTemporaryFile(delete=False) cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) - child = subprocess.Popen(cmd.split(), - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = child.communicate() return_code = child.returncode @@ -65,43 +69,43 @@ return (tmp.name, query) -def get_smiles_or_smarts( args ): +def get_smiles_or_smarts(args): """ Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. """ if args.iformat in ['smi', 'text', 'tabular']: - with open( args.infile ) as text_file: + with open(args.infile) as text_file: for line in text_file: yield line.split('\t')[0].strip() else: # inchi or sdf files - for mol in pybel.readfile( args.iformat, args.infile ): + for mol in pybel.readfile(args.iformat, args.infile): yield mol.write('smiles').split('\t')[0] -def substructure_search( args ): - pool = multiprocessing.Pool( args.processors ) - for query in get_smiles_or_smarts( args ): +def substructure_search(args): + pool = multiprocessing.Pool(args.processors) + for query in get_smiles_or_smarts(args): pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) - #mp_callback( mp_helper(query, args) ) + # mp_callback(mp_helper(query, args)) pool.close() pool.join() if args.oformat == 'names': - out_handle = open( args.outfile, 'w' ) + out_handle = open(args.outfile, 'w') for result_file, query in results: with open(result_file) as res_handle: for line in res_handle: - out_handle.write('%s\t%s\n' % ( line.strip(), query )) - os.remove( result_file ) + out_handle.write('%s\t%s\n' % (line.strip(), query)) + os.remove(result_file) out_handle.close() else: - out_handle = open( args.outfile, 'wb' ) + out_handle = open(args.outfile, 'wb') for result_file, query in results: - res_handle = open(result_file,'rb') - shutil.copyfileobj( res_handle, out_handle ) + res_handle = open(result_file, 'rb') + shutil.copyfileobj(res_handle, out_handle) res_handle.close() - os.remove( result_file ) + os.remove(result_file) out_handle.close() @@ -110,7 +114,8 @@ Multiprocessing Open Babel Substructure Search. """ args = parse_command_line() - substructure_search( args ) + substructure_search(args) + -if __name__ == "__main__" : +if __name__ == "__main__": __main__()