Mercurial > repos > bgruening > openbabel_structure_distance_finder
comparison multi_obgrep.py @ 3:49242402887b draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
| author | bgruening |
|---|---|
| date | Mon, 19 Oct 2020 14:40:22 +0000 |
| parents | 4c9d6b47045c |
| children | 8302ab092300 |
comparison
equal
deleted
inserted
replaced
| 2:4c9d6b47045c | 3:49242402887b |
|---|---|
| 2 """ | 2 """ |
| 3 Input: Molecules in SDF, SMILES ... | 3 Input: Molecules in SDF, SMILES ... |
| 4 Output: Molecule file filtered with obgrep. | 4 Output: Molecule file filtered with obgrep. |
| 5 Copyright 2013, Bjoern Gruening and Xavier Lucas | 5 Copyright 2013, Bjoern Gruening and Xavier Lucas |
| 6 """ | 6 """ |
| 7 import sys, os | |
| 8 import argparse | 7 import argparse |
| 9 import multiprocessing | 8 import multiprocessing |
| 9 import os | |
| 10 import shlex | |
| 11 import shutil | |
| 12 import subprocess | |
| 10 import tempfile | 13 import tempfile |
| 11 import subprocess | |
| 12 import shutil | |
| 13 import shlex | |
| 14 | 14 |
| 15 from openbabel import openbabel, pybel | 15 |
| 16 openbabel.obErrorLog.StopLogging() | |
| 17 def parse_command_line(): | 16 def parse_command_line(): |
| 18 parser = argparse.ArgumentParser() | 17 parser = argparse.ArgumentParser() |
| 19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | 18 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') |
| 20 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') | 19 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') |
| 21 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | 20 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') |
| 22 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") | 21 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") |
| 23 parser.add_argument("--n-times", dest="n_times", type=int, | 22 parser.add_argument("--n-times", dest="n_times", type=int, |
| 24 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") | 23 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") |
| 25 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) | 24 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) |
| 26 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", | 25 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", |
| 27 default=False, help="Invert the matching, print non-matching molecules.") | 26 default=False, help="Invert the matching, print non-matching molecules.") |
| 28 parser.add_argument("--only-name", dest="only_name", action="store_true", | 27 parser.add_argument("--only-name", dest="only_name", action="store_true", |
| 29 default=False, help="Only print the name of the molecules.") | 28 default=False, help="Only print the name of the molecules.") |
| 30 parser.add_argument("--full-match", dest="full_match", action="store_true", | 29 parser.add_argument("--full-match", dest="full_match", action="store_true", |
| 31 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") | 30 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") |
| 32 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", | 31 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", |
| 33 default=False, help="Print the number of matches.") | 32 default=False, help="Print the number of matches.") |
| 34 return parser.parse_args() | 33 return parser.parse_args() |
| 35 | 34 |
| 35 | |
| 36 results = list() | 36 results = list() |
| 37 | |
| 38 | |
| 37 def mp_callback(res): | 39 def mp_callback(res): |
| 38 results.append(res) | 40 results.append(res) |
| 39 | 41 |
| 40 def mp_helper( query, args ): | 42 |
| 43 def mp_helper(query, args): | |
| 41 """ | 44 """ |
| 42 Helper function for multiprocessing. | 45 Helper function for multiprocessing. |
| 43 That function is a wrapper around obgrep. | 46 That function is a wrapper around obgrep. |
| 44 """ | 47 """ |
| 45 | 48 |
| 55 if args.n_times: | 58 if args.n_times: |
| 56 cmd_list.append('-t %s' % str(args.n_times)) | 59 cmd_list.append('-t %s' % str(args.n_times)) |
| 57 | 60 |
| 58 tmp = tempfile.NamedTemporaryFile(delete=False) | 61 tmp = tempfile.NamedTemporaryFile(delete=False) |
| 59 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) | 62 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) |
| 60 child = subprocess.Popen(shlex.split(cmd), | 63 child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) |
| 61 stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) | |
| 62 | 64 |
| 63 stdout, stderr = child.communicate() | 65 stdout, stderr = child.communicate() |
| 64 return (tmp.name, query) | 66 return (tmp.name, query) |
| 65 | 67 |
| 66 | 68 |
| 67 def obgrep( args ): | 69 def obgrep(args): |
| 68 | |
| 69 temp_file = tempfile.NamedTemporaryFile() | 70 temp_file = tempfile.NamedTemporaryFile() |
| 70 temp_link = "%s.%s" % (temp_file.name, args.iformat) | 71 temp_link = "%s.%s" % (temp_file.name, args.iformat) |
| 71 temp_file.close() | 72 temp_file.close() |
| 72 os.symlink(args.infile, temp_link) | 73 os.symlink(args.infile, temp_link) |
| 73 args.infile = temp_link | 74 args.infile = temp_link |
| 74 | 75 |
| 75 pool = multiprocessing.Pool( args.processors ) | 76 pool = multiprocessing.Pool(args.processors) |
| 76 for query in open( args.query ): | 77 for query in open(args.query): |
| 77 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) | 78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) |
| 78 #mp_callback( mp_helper(query.strip(), args) ) | 79 # mp_callback(mp_helper(query.strip(), args)) |
| 79 pool.close() | 80 pool.close() |
| 80 pool.join() | 81 pool.join() |
| 81 | 82 |
| 82 out_handle = open( args.outfile, 'wb' ) | 83 out_handle = open(args.outfile, 'wb') |
| 83 for result_file, query in results: | 84 for result_file, query in results: |
| 84 res_handle = open(result_file,'rb') | 85 res_handle = open(result_file, 'rb') |
| 85 shutil.copyfileobj( res_handle, out_handle ) | 86 shutil.copyfileobj(res_handle, out_handle) |
| 86 res_handle.close() | 87 res_handle.close() |
| 87 os.remove( result_file ) | 88 os.remove(result_file) |
| 88 out_handle.close() | 89 out_handle.close() |
| 89 | 90 |
| 90 os.remove( temp_link ) | 91 os.remove(temp_link) |
| 92 | |
| 91 | 93 |
| 92 def __main__(): | 94 def __main__(): |
| 93 """ | 95 """ |
| 94 Multiprocessing obgrep search. | 96 Multiprocessing obgrep search. |
| 95 """ | 97 """ |
| 96 args = parse_command_line() | 98 args = parse_command_line() |
| 97 obgrep( args ) | 99 obgrep(args) |
| 98 | 100 |
| 99 if __name__ == "__main__" : | 101 |
| 102 if __name__ == "__main__": | |
| 100 __main__() | 103 __main__() |
