comparison ob_filter.py @ 15:7b6fd1c273cd draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author bgruening
date Thu, 15 Aug 2024 11:03:49 +0000
parents f697d9601273
children
comparison
equal deleted inserted replaced
14:6e4b7e0c61a6 15:7b6fd1c273cd
12 import subprocess 12 import subprocess
13 import sys 13 import sys
14 14
15 import cheminfolib 15 import cheminfolib
16 from openbabel import pybel 16 from openbabel import pybel
17
17 cheminfolib.pybel_stop_logging() 18 cheminfolib.pybel_stop_logging()
18 19
19 20
20 def parse_command_line(): 21 def parse_command_line():
21 parser = argparse.ArgumentParser() 22 parser = argparse.ArgumentParser()
22 parser.add_argument('-i', '--input', help='Input file name') 23 parser.add_argument("-i", "--input", help="Input file name")
23 parser.add_argument('-iformat', help='Input file format') 24 parser.add_argument("-iformat", help="Input file format")
24 parser.add_argument('-oformat', default='smi', 25 parser.add_argument("-oformat", default="smi", help="Output file format")
25 help='Output file format') 26 parser.add_argument("-o", "--output", help="Output file name", required=True)
26 parser.add_argument('-o', '--output', help='Output file name', 27 parser.add_argument("--filters", help="Specify the filters to apply", required=True)
27 required=True) 28 parser.add_argument(
28 parser.add_argument('--filters', help="Specify the filters to apply", 29 "--list_of_names",
29 required=True) 30 required=False,
30 parser.add_argument('--list_of_names', required=False, 31 help="A file with list of molecule names to extract. Every name is in one line.",
31 help="A file with list of molecule names to extract. Every name is in one line.") 32 )
32 return parser.parse_args() 33 return parser.parse_args()
33 34
34 35
35 def filter_precalculated_compounds(args, filters): 36 def filter_precalculated_compounds(args, filters):
36 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) 37 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
37 for mol in pybel.readfile('sdf', args.input): 38 for mol in pybel.readfile("sdf", args.input):
38 for key, elem in filters.items(): 39 for key, elem in filters.items():
39 # map the short description to the larger metadata names stored in the sdf file 40 # map the short description to the larger metadata names stored in the sdf file
40 property = cheminfolib.ColumnNames.get(key, key) 41 property = cheminfolib.ColumnNames.get(key, key)
41 min = elem[0] 42 min = elem[0]
42 max = elem[1] 43 max = elem[1]
43 if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max): 44 if float(mol.data[property]) >= float(min) and float(
45 mol.data[property]
46 ) <= float(max):
44 pass 47 pass
45 else: 48 else:
46 # leave the filter loop, because one filter constrained are not satisfied 49 # leave the filter loop, because one filter constrained are not satisfied
47 break 50 break
48 else: 51 else:
54 def filter_new_compounds(args, filters): 57 def filter_new_compounds(args, filters):
55 58
56 if args.iformat == args.oformat: 59 if args.iformat == args.oformat:
57 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out 60 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out
58 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text 61 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text
59 cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output) 62 cmd = "obabel -i%s %s -ocopy -O %s --filter" % (
63 args.iformat,
64 args.input,
65 args.output,
66 )
60 else: 67 else:
61 cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output) 68 cmd = "obabel -i%s %s -o%s -O %s --filter" % (
62 filter_cmd = '' 69 args.iformat,
70 args.input,
71 args.oformat,
72 args.output,
73 )
74 filter_cmd = ""
63 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] 75 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1]
64 for key, elem in filters.items(): 76 for key, elem in filters.items():
65 ob_descriptor_name = cheminfolib.OBDescriptor[key][0] 77 ob_descriptor_name = cheminfolib.OBDescriptor[key][0]
66 min = elem[0] 78 min = elem[0]
67 max = elem[1] 79 max = elem[1]
68 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) 80 filter_cmd += " %s>=%s %s<=%s " % (
81 ob_descriptor_name,
82 min,
83 ob_descriptor_name,
84 max,
85 )
69 86
70 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) 87 args = shlex.split('%s "%s"' % (cmd, filter_cmd))
71 # print '%s "%s"' % (cmd, filter_cmd) 88 # print '%s "%s"' % (cmd, filter_cmd)
72 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout 89 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout
73 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 90 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
74 91
75 stdout, stderr = child.communicate() 92 stdout, stderr = child.communicate()
76 return_code = child.returncode 93 return_code = child.returncode
77 94
78 if return_code: 95 if return_code:
79 sys.stdout.write(stdout.decode('utf-8')) 96 sys.stdout.write(stdout.decode("utf-8"))
80 sys.stderr.write(stderr.decode('utf-8')) 97 sys.stderr.write(stderr.decode("utf-8"))
81 sys.stderr.write("Return error code %i from command:\n" % return_code) 98 sys.stderr.write("Return error code %i from command:\n" % return_code)
82 sys.stderr.write("%s\n" % cmd) 99 sys.stderr.write("%s\n" % cmd)
83 else: 100 else:
84 sys.stdout.write(stdout.decode('utf-8')) 101 sys.stdout.write(stdout.decode("utf-8"))
85 sys.stdout.write(stderr.decode('utf-8')) 102 sys.stdout.write(stderr.decode("utf-8"))
86 103
87 104
88 def filter_by_name(args): 105 def filter_by_name(args):
89 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) 106 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
90 for mol in pybel.readfile('sdf', args.input): 107 for mol in pybel.readfile("sdf", args.input):
91 for name in open(args.list_of_names): 108 for name in open(args.list_of_names):
92 if mol.title.strip() == name.strip(): 109 if mol.title.strip() == name.strip():
93 outfile.write(mol) 110 outfile.write(mol)
94 outfile.close() 111 outfile.close()
95 112
96 113
97 def __main__(): 114 def __main__():
98 """ 115 """
99 Select compounds with certain properties from a small library 116 Select compounds with certain properties from a small library
100 """ 117 """
101 args = parse_command_line() 118 args = parse_command_line()
102 119
103 if args.filters == '__filter_by_name__': 120 if args.filters == "__filter_by_name__":
104 filter_by_name(args) 121 filter_by_name(args)
105 return 122 return
106 123
107 # Its a small trick to get the parameters in an easy way from the xml file. 124 # Its a small trick to get the parameters in an easy way from the xml file.
108 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. 125 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
109 # Also the last loop creates a ',{' that is not an valid jason expression. 126 # Also the last loop creates a ',{' that is not an valid jason expression.
110 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) 127 filters = json.loads((args.filters).replace(" ", "").replace(",}", "}"))
111 if args.iformat == 'sdf': 128 if args.iformat == "sdf":
112 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering 129 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering
113 mol = next(pybel.readfile('sdf', args.input)) 130 mol = next(pybel.readfile("sdf", args.input))
114 for key, elem in filters.items(): 131 for key, elem in filters.items():
115 property = cheminfolib.ColumnNames.get(key, key) 132 property = cheminfolib.ColumnNames.get(key, key)
116 if property not in mol.data: 133 if property not in mol.data:
117 break 134 break
118 else: 135 else: