comparison ob_filter.py @ 13:3153b6f3087c draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
author bgruening
date Mon, 19 Oct 2020 14:43:01 +0000
parents 354c048550f7
children f70b83b730ac
comparison
equal deleted inserted replaced
12:354c048550f7 13:3153b6f3087c
4 Output: set of molecules that pass all the filters 4 Output: set of molecules that pass all the filters
5 Copyright 2012, Bjoern Gruening and Xavier Lucas 5 Copyright 2012, Bjoern Gruening and Xavier Lucas
6 6
7 TODO: AND/OR conditions? 7 TODO: AND/OR conditions?
8 """ 8 """
9 import sys, os
10 import argparse 9 import argparse
10 import json
11 import shlex
12 import subprocess
13 import sys
14
11 import cheminfolib 15 import cheminfolib
12 import json
13 import shlex, subprocess
14
15 from openbabel import pybel 16 from openbabel import pybel
16 cheminfolib.pybel_stop_logging() 17 cheminfolib.pybel_stop_logging()
18
17 19
18 def parse_command_line(): 20 def parse_command_line():
19 parser = argparse.ArgumentParser() 21 parser = argparse.ArgumentParser()
20 parser.add_argument('-i', '--input', help='Input file name') 22 parser.add_argument('-i', '--input', help='Input file name')
21 parser.add_argument('-iformat', help='Input file format') 23 parser.add_argument('-iformat', help='Input file format')
22 parser.add_argument('-oformat', 24 parser.add_argument('-oformat', default='smi',
23 default='smi', 25 help='Output file format')
24 help='Output file format') 26 parser.add_argument('-o', '--output', help='Output file name',
25 parser.add_argument('-o', '--output', 27 required=True)
26 help='Output file name', 28 parser.add_argument('--filters', help="Specify the filters to apply",
27 required=True) 29 required=True)
28 parser.add_argument('--filters', 30 parser.add_argument('--list_of_names', required=False,
29 help="Specify the filters to apply", 31 help="A file with list of molecule names to extract. Every name is in one line.")
30 required=True,
31 )
32 parser.add_argument('--list_of_names',
33 help="A file with list of molecule names to extract. Every name is in one line.",
34 required=False,
35 )
36 return parser.parse_args() 32 return parser.parse_args()
33
37 34
38 def filter_precalculated_compounds(args, filters): 35 def filter_precalculated_compounds(args, filters):
39 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) 36 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
40 for mol in pybel.readfile('sdf', args.input): 37 for mol in pybel.readfile('sdf', args.input):
41 for key, elem in filters.items(): 38 for key, elem in filters.items():
51 else: 48 else:
52 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound 49 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound
53 outfile.write(mol) 50 outfile.write(mol)
54 outfile.close() 51 outfile.close()
55 52
53
56 def filter_new_compounds(args, filters): 54 def filter_new_compounds(args, filters):
57 55
58 if args.iformat == args.oformat: 56 if args.iformat == args.oformat:
59 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out 57 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out
60 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text 58 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text
68 min = elem[0] 66 min = elem[0]
69 max = elem[1] 67 max = elem[1]
70 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) 68 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max)
71 69
72 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) 70 args = shlex.split('%s "%s"' % (cmd, filter_cmd))
73 #print '%s "%s"' % (cmd, filter_cmd) 71 # print '%s "%s"' % (cmd, filter_cmd)
74 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout 72 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout
75 child = subprocess.Popen(args, 73 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
76 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
77 74
78 stdout, stderr = child.communicate() 75 stdout, stderr = child.communicate()
79 return_code = child.returncode 76 return_code = child.returncode
80 77
81 if return_code: 78 if return_code:
85 sys.stderr.write("%s\n" % cmd) 82 sys.stderr.write("%s\n" % cmd)
86 else: 83 else:
87 sys.stdout.write(stdout.decode('utf-8')) 84 sys.stdout.write(stdout.decode('utf-8'))
88 sys.stdout.write(stderr.decode('utf-8')) 85 sys.stdout.write(stderr.decode('utf-8'))
89 86
87
90 def filter_by_name(args): 88 def filter_by_name(args):
91 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) 89 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
92 for mol in pybel.readfile('sdf', args.input): 90 for mol in pybel.readfile('sdf', args.input):
93 for name in open(args.list_of_names): 91 for name in open(args.list_of_names):
94 if mol.title.strip() == name.strip(): 92 if mol.title.strip() == name.strip():
95 outfile.write(mol) 93 outfile.write(mol)
96 outfile.close() 94 outfile.close()
97 95
96
98 def __main__(): 97 def __main__():
99 """ 98 """
100 Select compounds with certain properties from a small library 99 Select compounds with certain properties from a small library
101 """ 100 """
102 args = parse_command_line() 101 args = parse_command_line()
103 102
104 if args.filters == '__filter_by_name__': 103 if args.filters == '__filter_by_name__':
105 filter_by_name(args) 104 filter_by_name(args)
106 return 105 return
107 106
108 # Its a small trick to get the parameters in an easy way from the xml file. 107 # Its a small trick to get the parameters in an easy way from the xml file.
109 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. 108 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
110 # Also the last loop creates a ',{' that is not an valid jason expression. 109 # Also the last loop creates a ',{' that is not an valid jason expression.
111 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) 110 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}'))
112 if args.iformat == 'sdf': 111 if args.iformat == 'sdf':
113 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering 112 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering
114 mol = next(pybel.readfile('sdf', args.input)) 113 mol = next(pybel.readfile('sdf', args.input))
115 for key, elem in filters.items(): 114 for key, elem in filters.items():
116 property = cheminfolib.ColumnNames.get(key, key) 115 property = cheminfolib.ColumnNames.get(key, key)
117 if not property in mol.data: 116 if property not in mol.data:
118 break 117 break
119 else: 118 else:
120 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule 119 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule
121 # assume it is the same for all other molecules and start the precalculated filtering 120 # assume it is the same for all other molecules and start the precalculated filtering
122 filter_precalculated_compounds(args, filters) 121 filter_precalculated_compounds(args, filters)
123 return True 122 return True
124 filter_new_compounds(args, filters) 123 filter_new_compounds(args, filters)
125 124
126 125
127 if __name__ == "__main__" : 126 if __name__ == "__main__":
128 __main__() 127 __main__()