comparison ob_filter.py @ 0:2704d4017b13 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
author bgruening
date Sat, 20 May 2017 08:39:53 -0400
parents
children ab4318b816bd
comparison
equal deleted inserted replaced
-1:000000000000 0:2704d4017b13
1 #!/usr/bin/env python
2 """
3 Input: set of molecules with pre-calculated physico-chemical properties
4 Output: set of molecules that pass all the filters
5 Copyright 2012, Bjoern Gruening and Xavier Lucas
6
7 TODO: AND/OR conditions?
8 """
9 import sys, os
10 import argparse
11 import cheminfolib
12 import json
13 import pybel
14 import shlex, subprocess
15
16 cheminfolib.pybel_stop_logging()
17
18 def parse_command_line():
19 parser = argparse.ArgumentParser()
20 parser.add_argument('-i', '--input', help='Input file name')
21 parser.add_argument('-iformat', help='Input file format')
22 parser.add_argument('-oformat',
23 default='smi',
24 help='Output file format')
25 parser.add_argument('-o', '--output',
26 help='Output file name',
27 required=True)
28 parser.add_argument('--filters',
29 help="Specify the filters to apply",
30 required=True,
31 )
32 return parser.parse_args()
33
34 def filter_precalculated_compounds(args, filters):
35 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
36 for mol in pybel.readfile('sdf', args.input):
37 for key, elem in filters.items():
38 # map the short description to the larger metadata names stored in the sdf file
39 property = cheminfolib.ColumnNames[key]
40 min = elem[0]
41 max = elem[1]
42 if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max):
43 pass
44 else:
45 # leave the filter loop, because one filter constrained are not satisfied
46 break
47 else:
48 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound
49 outfile.write(mol)
50 outfile.close()
51
52 def filter_new_compounds(args, filters):
53
54 if args.iformat == args.oformat:
55 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out
56 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text
57 cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output)
58 else:
59 cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output)
60 filter_cmd = ''
61 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1]
62 for key, elem in filters.items():
63 ob_descriptor_name = cheminfolib.OBDescriptor[key][0]
64 min = elem[0]
65 max = elem[1]
66 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max)
67
68 args = shlex.split('%s "%s"' % (cmd, filter_cmd))
69 #print '%s "%s"' % (cmd, filter_cmd)
70 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout
71 child = subprocess.Popen(args,
72 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
73
74 stdout, stderr = child.communicate()
75 return_code = child.returncode
76
77 if return_code:
78 sys.stdout.write(stdout.decode('utf-8'))
79 sys.stderr.write(stderr.decode('utf-8'))
80 sys.stderr.write("Return error code %i from command:\n" % return_code)
81 sys.stderr.write("%s\n" % cmd)
82 else:
83 sys.stdout.write(stdout.decode('utf-8'))
84 sys.stdout.write(stderr.decode('utf-8'))
85
86
87 def __main__():
88 """
89 Select compounds with certain properties from a small library
90 """
91 args = parse_command_line()
92 # Its a small trick to get the parameters in an easy way from the xml file.
93 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
94 # Also the last loop creates a ',{' that is not an valid jason expression.
95 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}'))
96 if args.iformat == 'sdf':
97 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering
98 mol = pybel.readfile('sdf', args.input).next()
99 for key, elem in filters.items():
100 property = cheminfolib.ColumnNames[key]
101 if not property in mol.data:
102 break
103 else:
104 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule
105 # assume it is the same for all other molecules and start the precalculated filtering
106 filter_precalculated_compounds(args, filters)
107 return True
108 filter_new_compounds(args, filters)
109
110
111 if __name__ == "__main__" :
112 __main__()