Mercurial > repos > bgruening > openbabel_remduplicates
comparison ob_filter.py @ 0:75d6c2b7907a draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
author | bgruening |
---|---|
date | Sat, 20 May 2017 08:39:17 -0400 |
parents | |
children | 877a57d8600c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:75d6c2b7907a |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Input: set of molecules with pre-calculated physico-chemical properties | |
4 Output: set of molecules that pass all the filters | |
5 Copyright 2012, Bjoern Gruening and Xavier Lucas | |
6 | |
7 TODO: AND/OR conditions? | |
8 """ | |
9 import sys, os | |
10 import argparse | |
11 import cheminfolib | |
12 import json | |
13 import pybel | |
14 import shlex, subprocess | |
15 | |
16 cheminfolib.pybel_stop_logging() | |
17 | |
18 def parse_command_line(): | |
19 parser = argparse.ArgumentParser() | |
20 parser.add_argument('-i', '--input', help='Input file name') | |
21 parser.add_argument('-iformat', help='Input file format') | |
22 parser.add_argument('-oformat', | |
23 default='smi', | |
24 help='Output file format') | |
25 parser.add_argument('-o', '--output', | |
26 help='Output file name', | |
27 required=True) | |
28 parser.add_argument('--filters', | |
29 help="Specify the filters to apply", | |
30 required=True, | |
31 ) | |
32 return parser.parse_args() | |
33 | |
34 def filter_precalculated_compounds(args, filters): | |
35 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | |
36 for mol in pybel.readfile('sdf', args.input): | |
37 for key, elem in filters.items(): | |
38 # map the short description to the larger metadata names stored in the sdf file | |
39 property = cheminfolib.ColumnNames[key] | |
40 min = elem[0] | |
41 max = elem[1] | |
42 if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max): | |
43 pass | |
44 else: | |
45 # leave the filter loop, because one filter constrained are not satisfied | |
46 break | |
47 else: | |
48 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound | |
49 outfile.write(mol) | |
50 outfile.close() | |
51 | |
52 def filter_new_compounds(args, filters): | |
53 | |
54 if args.iformat == args.oformat: | |
55 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out | |
56 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text | |
57 cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output) | |
58 else: | |
59 cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output) | |
60 filter_cmd = '' | |
61 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] | |
62 for key, elem in filters.items(): | |
63 ob_descriptor_name = cheminfolib.OBDescriptor[key][0] | |
64 min = elem[0] | |
65 max = elem[1] | |
66 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) | |
67 | |
68 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) | |
69 #print '%s "%s"' % (cmd, filter_cmd) | |
70 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout | |
71 child = subprocess.Popen(args, | |
72 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
73 | |
74 stdout, stderr = child.communicate() | |
75 return_code = child.returncode | |
76 | |
77 if return_code: | |
78 sys.stdout.write(stdout.decode('utf-8')) | |
79 sys.stderr.write(stderr.decode('utf-8')) | |
80 sys.stderr.write("Return error code %i from command:\n" % return_code) | |
81 sys.stderr.write("%s\n" % cmd) | |
82 else: | |
83 sys.stdout.write(stdout.decode('utf-8')) | |
84 sys.stdout.write(stderr.decode('utf-8')) | |
85 | |
86 | |
87 def __main__(): | |
88 """ | |
89 Select compounds with certain properties from a small library | |
90 """ | |
91 args = parse_command_line() | |
92 # Its a small trick to get the parameters in an easy way from the xml file. | |
93 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. | |
94 # Also the last loop creates a ',{' that is not an valid jason expression. | |
95 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) | |
96 if args.iformat == 'sdf': | |
97 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering | |
98 mol = pybel.readfile('sdf', args.input).next() | |
99 for key, elem in filters.items(): | |
100 property = cheminfolib.ColumnNames[key] | |
101 if not property in mol.data: | |
102 break | |
103 else: | |
104 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule | |
105 # assume it is the same for all other molecules and start the precalculated filtering | |
106 filter_precalculated_compounds(args, filters) | |
107 return True | |
108 filter_new_compounds(args, filters) | |
109 | |
110 | |
111 if __name__ == "__main__" : | |
112 __main__() |