Mercurial > repos > bgruening > openbabel_remduplicates
comparison multi_obgrep.py @ 13:12aca74f07d7 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
author | bgruening |
---|---|
date | Mon, 19 Oct 2020 14:47:33 +0000 |
parents | 50ca8845e7f5 |
children | c5de6c19eb06 |
comparison
equal
deleted
inserted
replaced
12:50ca8845e7f5 | 13:12aca74f07d7 |
---|---|
2 """ | 2 """ |
3 Input: Molecules in SDF, SMILES ... | 3 Input: Molecules in SDF, SMILES ... |
4 Output: Molecule file filtered with obgrep. | 4 Output: Molecule file filtered with obgrep. |
5 Copyright 2013, Bjoern Gruening and Xavier Lucas | 5 Copyright 2013, Bjoern Gruening and Xavier Lucas |
6 """ | 6 """ |
7 import sys, os | |
8 import argparse | 7 import argparse |
9 import multiprocessing | 8 import multiprocessing |
9 import os | |
10 import shlex | |
11 import shutil | |
12 import subprocess | |
10 import tempfile | 13 import tempfile |
11 import subprocess | |
12 import shutil | |
13 import shlex | |
14 | 14 |
15 from openbabel import openbabel, pybel | 15 |
16 openbabel.obErrorLog.StopLogging() | |
17 def parse_command_line(): | 16 def parse_command_line(): |
18 parser = argparse.ArgumentParser() | 17 parser = argparse.ArgumentParser() |
19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | 18 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') |
20 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') | 19 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') |
21 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | 20 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') |
22 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") | 21 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") |
23 parser.add_argument("--n-times", dest="n_times", type=int, | 22 parser.add_argument("--n-times", dest="n_times", type=int, |
24 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") | 23 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") |
25 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) | 24 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) |
26 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", | 25 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", |
27 default=False, help="Invert the matching, print non-matching molecules.") | 26 default=False, help="Invert the matching, print non-matching molecules.") |
28 parser.add_argument("--only-name", dest="only_name", action="store_true", | 27 parser.add_argument("--only-name", dest="only_name", action="store_true", |
29 default=False, help="Only print the name of the molecules.") | 28 default=False, help="Only print the name of the molecules.") |
30 parser.add_argument("--full-match", dest="full_match", action="store_true", | 29 parser.add_argument("--full-match", dest="full_match", action="store_true", |
31 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") | 30 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") |
32 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", | 31 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", |
33 default=False, help="Print the number of matches.") | 32 default=False, help="Print the number of matches.") |
34 return parser.parse_args() | 33 return parser.parse_args() |
35 | 34 |
35 | |
36 results = list() | 36 results = list() |
37 | |
38 | |
37 def mp_callback(res): | 39 def mp_callback(res): |
38 results.append(res) | 40 results.append(res) |
39 | 41 |
40 def mp_helper( query, args ): | 42 |
43 def mp_helper(query, args): | |
41 """ | 44 """ |
42 Helper function for multiprocessing. | 45 Helper function for multiprocessing. |
43 That function is a wrapper around obgrep. | 46 That function is a wrapper around obgrep. |
44 """ | 47 """ |
45 | 48 |
55 if args.n_times: | 58 if args.n_times: |
56 cmd_list.append('-t %s' % str(args.n_times)) | 59 cmd_list.append('-t %s' % str(args.n_times)) |
57 | 60 |
58 tmp = tempfile.NamedTemporaryFile(delete=False) | 61 tmp = tempfile.NamedTemporaryFile(delete=False) |
59 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) | 62 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) |
60 child = subprocess.Popen(shlex.split(cmd), | 63 child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) |
61 stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) | |
62 | 64 |
63 stdout, stderr = child.communicate() | 65 stdout, stderr = child.communicate() |
64 return (tmp.name, query) | 66 return (tmp.name, query) |
65 | 67 |
66 | 68 |
67 def obgrep( args ): | 69 def obgrep(args): |
68 | |
69 temp_file = tempfile.NamedTemporaryFile() | 70 temp_file = tempfile.NamedTemporaryFile() |
70 temp_link = "%s.%s" % (temp_file.name, args.iformat) | 71 temp_link = "%s.%s" % (temp_file.name, args.iformat) |
71 temp_file.close() | 72 temp_file.close() |
72 os.symlink(args.infile, temp_link) | 73 os.symlink(args.infile, temp_link) |
73 args.infile = temp_link | 74 args.infile = temp_link |
74 | 75 |
75 pool = multiprocessing.Pool( args.processors ) | 76 pool = multiprocessing.Pool(args.processors) |
76 for query in open( args.query ): | 77 for query in open(args.query): |
77 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) | 78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) |
78 #mp_callback( mp_helper(query.strip(), args) ) | 79 # mp_callback(mp_helper(query.strip(), args)) |
79 pool.close() | 80 pool.close() |
80 pool.join() | 81 pool.join() |
81 | 82 |
82 out_handle = open( args.outfile, 'wb' ) | 83 out_handle = open(args.outfile, 'wb') |
83 for result_file, query in results: | 84 for result_file, query in results: |
84 res_handle = open(result_file,'rb') | 85 res_handle = open(result_file, 'rb') |
85 shutil.copyfileobj( res_handle, out_handle ) | 86 shutil.copyfileobj(res_handle, out_handle) |
86 res_handle.close() | 87 res_handle.close() |
87 os.remove( result_file ) | 88 os.remove(result_file) |
88 out_handle.close() | 89 out_handle.close() |
89 | 90 |
90 os.remove( temp_link ) | 91 os.remove(temp_link) |
92 | |
91 | 93 |
92 def __main__(): | 94 def __main__(): |
93 """ | 95 """ |
94 Multiprocessing obgrep search. | 96 Multiprocessing obgrep search. |
95 """ | 97 """ |
96 args = parse_command_line() | 98 args = parse_command_line() |
97 obgrep( args ) | 99 obgrep(args) |
98 | 100 |
99 if __name__ == "__main__" : | 101 |
102 if __name__ == "__main__": | |
100 __main__() | 103 __main__() |