Mercurial > repos > bgruening > openbabel_structure_distance_finder
comparison multi_obgrep.py @ 0:c066b5accacf draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 6c84abdd07f292048bf2194073e2e938e94158c4"
| author | bgruening |
|---|---|
| date | Wed, 25 Mar 2020 16:47:13 -0400 |
| parents | |
| children | 4c9d6b47045c |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c066b5accacf |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Input: Molecules in SDF, SMILES ... | |
| 4 Output: Molecule file filtered with obgrep. | |
| 5 Copyright 2013, Bjoern Gruening and Xavier Lucas | |
| 6 """ | |
| 7 import sys, os | |
| 8 import argparse | |
| 9 import openbabel | |
| 10 openbabel.obErrorLog.StopLogging() | |
| 11 import pybel | |
| 12 import multiprocessing | |
| 13 import tempfile | |
| 14 import subprocess | |
| 15 import shutil | |
| 16 import shlex | |
| 17 | |
| 18 def parse_command_line(): | |
| 19 parser = argparse.ArgumentParser() | |
| 20 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | |
| 21 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') | |
| 22 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | |
| 23 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") | |
| 24 parser.add_argument("--n-times", dest="n_times", type=int, | |
| 25 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") | |
| 26 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) | |
| 27 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", | |
| 28 default=False, help="Invert the matching, print non-matching molecules.") | |
| 29 parser.add_argument("--only-name", dest="only_name", action="store_true", | |
| 30 default=False, help="Only print the name of the molecules.") | |
| 31 parser.add_argument("--full-match", dest="full_match", action="store_true", | |
| 32 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") | |
| 33 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", | |
| 34 default=False, help="Print the number of matches.") | |
| 35 return parser.parse_args() | |
| 36 | |
| 37 results = list() | |
| 38 def mp_callback(res): | |
| 39 results.append(res) | |
| 40 | |
| 41 def mp_helper( query, args ): | |
| 42 """ | |
| 43 Helper function for multiprocessing. | |
| 44 That function is a wrapper around obgrep. | |
| 45 """ | |
| 46 | |
| 47 cmd_list = [] | |
| 48 if args.invert_matches: | |
| 49 cmd_list.append('-v') | |
| 50 if args.only_name: | |
| 51 cmd_list.append('-n') | |
| 52 if args.full_match: | |
| 53 cmd_list.append('-f') | |
| 54 if args.number_of_matches: | |
| 55 cmd_list.append('-c') | |
| 56 if args.n_times: | |
| 57 cmd_list.append('-t %s' % str(args.n_times)) | |
| 58 | |
| 59 tmp = tempfile.NamedTemporaryFile(delete=False) | |
| 60 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) | |
| 61 child = subprocess.Popen(shlex.split(cmd), | |
| 62 stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) | |
| 63 | |
| 64 stdout, stderr = child.communicate() | |
| 65 return (tmp.name, query) | |
| 66 | |
| 67 | |
| 68 def obgrep( args ): | |
| 69 | |
| 70 temp_file = tempfile.NamedTemporaryFile() | |
| 71 temp_link = "%s.%s" % (temp_file.name, args.iformat) | |
| 72 temp_file.close() | |
| 73 os.symlink(args.infile, temp_link) | |
| 74 args.infile = temp_link | |
| 75 | |
| 76 pool = multiprocessing.Pool( args.processors ) | |
| 77 for query in open( args.query ): | |
| 78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) | |
| 79 #mp_callback( mp_helper(query.strip(), args) ) | |
| 80 pool.close() | |
| 81 pool.join() | |
| 82 | |
| 83 out_handle = open( args.outfile, 'wb' ) | |
| 84 for result_file, query in results: | |
| 85 res_handle = open(result_file,'rb') | |
| 86 shutil.copyfileobj( res_handle, out_handle ) | |
| 87 res_handle.close() | |
| 88 os.remove( result_file ) | |
| 89 out_handle.close() | |
| 90 | |
| 91 os.remove( temp_link ) | |
| 92 | |
| 93 def __main__(): | |
| 94 """ | |
| 95 Multiprocessing obgrep search. | |
| 96 """ | |
| 97 args = parse_command_line() | |
| 98 obgrep( args ) | |
| 99 | |
| 100 if __name__ == "__main__" : | |
| 101 __main__() |
