Mercurial > repos > bgruening > openbabel_remions
comparison ob_filter.py @ 13:3153b6f3087c draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
author | bgruening |
---|---|
date | Mon, 19 Oct 2020 14:43:01 +0000 |
parents | 354c048550f7 |
children | f70b83b730ac |
comparison
equal
deleted
inserted
replaced
12:354c048550f7 | 13:3153b6f3087c |
---|---|
4 Output: set of molecules that pass all the filters | 4 Output: set of molecules that pass all the filters |
5 Copyright 2012, Bjoern Gruening and Xavier Lucas | 5 Copyright 2012, Bjoern Gruening and Xavier Lucas |
6 | 6 |
7 TODO: AND/OR conditions? | 7 TODO: AND/OR conditions? |
8 """ | 8 """ |
9 import sys, os | |
10 import argparse | 9 import argparse |
10 import json | |
11 import shlex | |
12 import subprocess | |
13 import sys | |
14 | |
11 import cheminfolib | 15 import cheminfolib |
12 import json | |
13 import shlex, subprocess | |
14 | |
15 from openbabel import pybel | 16 from openbabel import pybel |
16 cheminfolib.pybel_stop_logging() | 17 cheminfolib.pybel_stop_logging() |
18 | |
17 | 19 |
18 def parse_command_line(): | 20 def parse_command_line(): |
19 parser = argparse.ArgumentParser() | 21 parser = argparse.ArgumentParser() |
20 parser.add_argument('-i', '--input', help='Input file name') | 22 parser.add_argument('-i', '--input', help='Input file name') |
21 parser.add_argument('-iformat', help='Input file format') | 23 parser.add_argument('-iformat', help='Input file format') |
22 parser.add_argument('-oformat', | 24 parser.add_argument('-oformat', default='smi', |
23 default='smi', | 25 help='Output file format') |
24 help='Output file format') | 26 parser.add_argument('-o', '--output', help='Output file name', |
25 parser.add_argument('-o', '--output', | 27 required=True) |
26 help='Output file name', | 28 parser.add_argument('--filters', help="Specify the filters to apply", |
27 required=True) | 29 required=True) |
28 parser.add_argument('--filters', | 30 parser.add_argument('--list_of_names', required=False, |
29 help="Specify the filters to apply", | 31 help="A file with list of molecule names to extract. Every name is in one line.") |
30 required=True, | |
31 ) | |
32 parser.add_argument('--list_of_names', | |
33 help="A file with list of molecule names to extract. Every name is in one line.", | |
34 required=False, | |
35 ) | |
36 return parser.parse_args() | 32 return parser.parse_args() |
33 | |
37 | 34 |
38 def filter_precalculated_compounds(args, filters): | 35 def filter_precalculated_compounds(args, filters): |
39 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | 36 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) |
40 for mol in pybel.readfile('sdf', args.input): | 37 for mol in pybel.readfile('sdf', args.input): |
41 for key, elem in filters.items(): | 38 for key, elem in filters.items(): |
51 else: | 48 else: |
52 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound | 49 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound |
53 outfile.write(mol) | 50 outfile.write(mol) |
54 outfile.close() | 51 outfile.close() |
55 | 52 |
53 | |
56 def filter_new_compounds(args, filters): | 54 def filter_new_compounds(args, filters): |
57 | 55 |
58 if args.iformat == args.oformat: | 56 if args.iformat == args.oformat: |
59 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out | 57 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out |
60 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text | 58 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text |
68 min = elem[0] | 66 min = elem[0] |
69 max = elem[1] | 67 max = elem[1] |
70 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) | 68 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) |
71 | 69 |
72 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) | 70 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) |
73 #print '%s "%s"' % (cmd, filter_cmd) | 71 # print '%s "%s"' % (cmd, filter_cmd) |
74 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout | 72 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout |
75 child = subprocess.Popen(args, | 73 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
76 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
77 | 74 |
78 stdout, stderr = child.communicate() | 75 stdout, stderr = child.communicate() |
79 return_code = child.returncode | 76 return_code = child.returncode |
80 | 77 |
81 if return_code: | 78 if return_code: |
85 sys.stderr.write("%s\n" % cmd) | 82 sys.stderr.write("%s\n" % cmd) |
86 else: | 83 else: |
87 sys.stdout.write(stdout.decode('utf-8')) | 84 sys.stdout.write(stdout.decode('utf-8')) |
88 sys.stdout.write(stderr.decode('utf-8')) | 85 sys.stdout.write(stderr.decode('utf-8')) |
89 | 86 |
87 | |
90 def filter_by_name(args): | 88 def filter_by_name(args): |
91 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | 89 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) |
92 for mol in pybel.readfile('sdf', args.input): | 90 for mol in pybel.readfile('sdf', args.input): |
93 for name in open(args.list_of_names): | 91 for name in open(args.list_of_names): |
94 if mol.title.strip() == name.strip(): | 92 if mol.title.strip() == name.strip(): |
95 outfile.write(mol) | 93 outfile.write(mol) |
96 outfile.close() | 94 outfile.close() |
97 | 95 |
96 | |
98 def __main__(): | 97 def __main__(): |
99 """ | 98 """ |
100 Select compounds with certain properties from a small library | 99 Select compounds with certain properties from a small library |
101 """ | 100 """ |
102 args = parse_command_line() | 101 args = parse_command_line() |
103 | 102 |
104 if args.filters == '__filter_by_name__': | 103 if args.filters == '__filter_by_name__': |
105 filter_by_name(args) | 104 filter_by_name(args) |
106 return | 105 return |
107 | 106 |
108 # Its a small trick to get the parameters in an easy way from the xml file. | 107 # Its a small trick to get the parameters in an easy way from the xml file. |
109 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. | 108 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. |
110 # Also the last loop creates a ',{' that is not an valid jason expression. | 109 # Also the last loop creates a ',{' that is not an valid jason expression. |
111 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) | 110 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) |
112 if args.iformat == 'sdf': | 111 if args.iformat == 'sdf': |
113 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering | 112 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering |
114 mol = next(pybel.readfile('sdf', args.input)) | 113 mol = next(pybel.readfile('sdf', args.input)) |
115 for key, elem in filters.items(): | 114 for key, elem in filters.items(): |
116 property = cheminfolib.ColumnNames.get(key, key) | 115 property = cheminfolib.ColumnNames.get(key, key) |
117 if not property in mol.data: | 116 if property not in mol.data: |
118 break | 117 break |
119 else: | 118 else: |
120 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule | 119 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule |
121 # assume it is the same for all other molecules and start the precalculated filtering | 120 # assume it is the same for all other molecules and start the precalculated filtering |
122 filter_precalculated_compounds(args, filters) | 121 filter_precalculated_compounds(args, filters) |
123 return True | 122 return True |
124 filter_new_compounds(args, filters) | 123 filter_new_compounds(args, filters) |
125 | 124 |
126 | 125 |
127 if __name__ == "__main__" : | 126 if __name__ == "__main__": |
128 __main__() | 127 __main__() |