Mercurial > repos > bgruening > openbabel_filter
comparison ob_filter.py @ 16:988085c7a0ea draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author | bgruening |
---|---|
date | Thu, 15 Aug 2024 11:06:01 +0000 |
parents | 8ee975c49a3d |
children |
comparison
equal
deleted
inserted
replaced
15:df4dc55197dd | 16:988085c7a0ea |
---|---|
12 import subprocess | 12 import subprocess |
13 import sys | 13 import sys |
14 | 14 |
15 import cheminfolib | 15 import cheminfolib |
16 from openbabel import pybel | 16 from openbabel import pybel |
17 | |
17 cheminfolib.pybel_stop_logging() | 18 cheminfolib.pybel_stop_logging() |
18 | 19 |
19 | 20 |
20 def parse_command_line(): | 21 def parse_command_line(): |
21 parser = argparse.ArgumentParser() | 22 parser = argparse.ArgumentParser() |
22 parser.add_argument('-i', '--input', help='Input file name') | 23 parser.add_argument("-i", "--input", help="Input file name") |
23 parser.add_argument('-iformat', help='Input file format') | 24 parser.add_argument("-iformat", help="Input file format") |
24 parser.add_argument('-oformat', default='smi', | 25 parser.add_argument("-oformat", default="smi", help="Output file format") |
25 help='Output file format') | 26 parser.add_argument("-o", "--output", help="Output file name", required=True) |
26 parser.add_argument('-o', '--output', help='Output file name', | 27 parser.add_argument("--filters", help="Specify the filters to apply", required=True) |
27 required=True) | 28 parser.add_argument( |
28 parser.add_argument('--filters', help="Specify the filters to apply", | 29 "--list_of_names", |
29 required=True) | 30 required=False, |
30 parser.add_argument('--list_of_names', required=False, | 31 help="A file with list of molecule names to extract. Every name is in one line.", |
31 help="A file with list of molecule names to extract. Every name is in one line.") | 32 ) |
32 return parser.parse_args() | 33 return parser.parse_args() |
33 | 34 |
34 | 35 |
35 def filter_precalculated_compounds(args, filters): | 36 def filter_precalculated_compounds(args, filters): |
36 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | 37 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) |
37 for mol in pybel.readfile('sdf', args.input): | 38 for mol in pybel.readfile("sdf", args.input): |
38 for key, elem in filters.items(): | 39 for key, elem in filters.items(): |
39 # map the short description to the larger metadata names stored in the sdf file | 40 # map the short description to the larger metadata names stored in the sdf file |
40 property = cheminfolib.ColumnNames.get(key, key) | 41 property = cheminfolib.ColumnNames.get(key, key) |
41 min = elem[0] | 42 min = elem[0] |
42 max = elem[1] | 43 max = elem[1] |
43 if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max): | 44 if float(mol.data[property]) >= float(min) and float( |
45 mol.data[property] | |
46 ) <= float(max): | |
44 pass | 47 pass |
45 else: | 48 else: |
46 # leave the filter loop, because one filter constrained are not satisfied | 49 # leave the filter loop, because one filter constrained are not satisfied |
47 break | 50 break |
48 else: | 51 else: |
54 def filter_new_compounds(args, filters): | 57 def filter_new_compounds(args, filters): |
55 | 58 |
56 if args.iformat == args.oformat: | 59 if args.iformat == args.oformat: |
57 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out | 60 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out |
58 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text | 61 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text |
59 cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output) | 62 cmd = "obabel -i%s %s -ocopy -O %s --filter" % ( |
63 args.iformat, | |
64 args.input, | |
65 args.output, | |
66 ) | |
60 else: | 67 else: |
61 cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output) | 68 cmd = "obabel -i%s %s -o%s -O %s --filter" % ( |
62 filter_cmd = '' | 69 args.iformat, |
70 args.input, | |
71 args.oformat, | |
72 args.output, | |
73 ) | |
74 filter_cmd = "" | |
63 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] | 75 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] |
64 for key, elem in filters.items(): | 76 for key, elem in filters.items(): |
65 ob_descriptor_name = cheminfolib.OBDescriptor[key][0] | 77 ob_descriptor_name = cheminfolib.OBDescriptor[key][0] |
66 min = elem[0] | 78 min = elem[0] |
67 max = elem[1] | 79 max = elem[1] |
68 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) | 80 filter_cmd += " %s>=%s %s<=%s " % ( |
81 ob_descriptor_name, | |
82 min, | |
83 ob_descriptor_name, | |
84 max, | |
85 ) | |
69 | 86 |
70 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) | 87 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) |
71 # print '%s "%s"' % (cmd, filter_cmd) | 88 # print '%s "%s"' % (cmd, filter_cmd) |
72 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout | 89 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout |
73 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 90 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
74 | 91 |
75 stdout, stderr = child.communicate() | 92 stdout, stderr = child.communicate() |
76 return_code = child.returncode | 93 return_code = child.returncode |
77 | 94 |
78 if return_code: | 95 if return_code: |
79 sys.stdout.write(stdout.decode('utf-8')) | 96 sys.stdout.write(stdout.decode("utf-8")) |
80 sys.stderr.write(stderr.decode('utf-8')) | 97 sys.stderr.write(stderr.decode("utf-8")) |
81 sys.stderr.write("Return error code %i from command:\n" % return_code) | 98 sys.stderr.write("Return error code %i from command:\n" % return_code) |
82 sys.stderr.write("%s\n" % cmd) | 99 sys.stderr.write("%s\n" % cmd) |
83 else: | 100 else: |
84 sys.stdout.write(stdout.decode('utf-8')) | 101 sys.stdout.write(stdout.decode("utf-8")) |
85 sys.stdout.write(stderr.decode('utf-8')) | 102 sys.stdout.write(stderr.decode("utf-8")) |
86 | 103 |
87 | 104 |
88 def filter_by_name(args): | 105 def filter_by_name(args): |
89 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | 106 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) |
90 for mol in pybel.readfile('sdf', args.input): | 107 for mol in pybel.readfile("sdf", args.input): |
91 for name in open(args.list_of_names): | 108 for name in open(args.list_of_names): |
92 if mol.title.strip() == name.strip(): | 109 if mol.title.strip() == name.strip(): |
93 outfile.write(mol) | 110 outfile.write(mol) |
94 outfile.close() | 111 outfile.close() |
95 | 112 |
96 | 113 |
97 def __main__(): | 114 def __main__(): |
98 """ | 115 """ |
99 Select compounds with certain properties from a small library | 116 Select compounds with certain properties from a small library |
100 """ | 117 """ |
101 args = parse_command_line() | 118 args = parse_command_line() |
102 | 119 |
103 if args.filters == '__filter_by_name__': | 120 if args.filters == "__filter_by_name__": |
104 filter_by_name(args) | 121 filter_by_name(args) |
105 return | 122 return |
106 | 123 |
107 # Its a small trick to get the parameters in an easy way from the xml file. | 124 # Its a small trick to get the parameters in an easy way from the xml file. |
108 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. | 125 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. |
109 # Also the last loop creates a ',{' that is not an valid jason expression. | 126 # Also the last loop creates a ',{' that is not an valid jason expression. |
110 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) | 127 filters = json.loads((args.filters).replace(" ", "").replace(",}", "}")) |
111 if args.iformat == 'sdf': | 128 if args.iformat == "sdf": |
112 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering | 129 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering |
113 mol = next(pybel.readfile('sdf', args.input)) | 130 mol = next(pybel.readfile("sdf", args.input)) |
114 for key, elem in filters.items(): | 131 for key, elem in filters.items(): |
115 property = cheminfolib.ColumnNames.get(key, key) | 132 property = cheminfolib.ColumnNames.get(key, key) |
116 if property not in mol.data: | 133 if property not in mol.data: |
117 break | 134 break |
118 else: | 135 else: |