Mercurial > repos > bgruening > openbabel_filter
comparison ob_filter.py @ 16:988085c7a0ea draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
| author | bgruening |
|---|---|
| date | Thu, 15 Aug 2024 11:06:01 +0000 |
| parents | 8ee975c49a3d |
| children |
comparison
equal
deleted
inserted
replaced
| 15:df4dc55197dd | 16:988085c7a0ea |
|---|---|
| 12 import subprocess | 12 import subprocess |
| 13 import sys | 13 import sys |
| 14 | 14 |
| 15 import cheminfolib | 15 import cheminfolib |
| 16 from openbabel import pybel | 16 from openbabel import pybel |
| 17 | |
| 17 cheminfolib.pybel_stop_logging() | 18 cheminfolib.pybel_stop_logging() |
| 18 | 19 |
| 19 | 20 |
| 20 def parse_command_line(): | 21 def parse_command_line(): |
| 21 parser = argparse.ArgumentParser() | 22 parser = argparse.ArgumentParser() |
| 22 parser.add_argument('-i', '--input', help='Input file name') | 23 parser.add_argument("-i", "--input", help="Input file name") |
| 23 parser.add_argument('-iformat', help='Input file format') | 24 parser.add_argument("-iformat", help="Input file format") |
| 24 parser.add_argument('-oformat', default='smi', | 25 parser.add_argument("-oformat", default="smi", help="Output file format") |
| 25 help='Output file format') | 26 parser.add_argument("-o", "--output", help="Output file name", required=True) |
| 26 parser.add_argument('-o', '--output', help='Output file name', | 27 parser.add_argument("--filters", help="Specify the filters to apply", required=True) |
| 27 required=True) | 28 parser.add_argument( |
| 28 parser.add_argument('--filters', help="Specify the filters to apply", | 29 "--list_of_names", |
| 29 required=True) | 30 required=False, |
| 30 parser.add_argument('--list_of_names', required=False, | 31 help="A file with list of molecule names to extract. Every name is in one line.", |
| 31 help="A file with list of molecule names to extract. Every name is in one line.") | 32 ) |
| 32 return parser.parse_args() | 33 return parser.parse_args() |
| 33 | 34 |
| 34 | 35 |
| 35 def filter_precalculated_compounds(args, filters): | 36 def filter_precalculated_compounds(args, filters): |
| 36 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | 37 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) |
| 37 for mol in pybel.readfile('sdf', args.input): | 38 for mol in pybel.readfile("sdf", args.input): |
| 38 for key, elem in filters.items(): | 39 for key, elem in filters.items(): |
| 39 # map the short description to the larger metadata names stored in the sdf file | 40 # map the short description to the larger metadata names stored in the sdf file |
| 40 property = cheminfolib.ColumnNames.get(key, key) | 41 property = cheminfolib.ColumnNames.get(key, key) |
| 41 min = elem[0] | 42 min = elem[0] |
| 42 max = elem[1] | 43 max = elem[1] |
| 43 if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max): | 44 if float(mol.data[property]) >= float(min) and float( |
| 45 mol.data[property] | |
| 46 ) <= float(max): | |
| 44 pass | 47 pass |
| 45 else: | 48 else: |
| 46 # leave the filter loop, because one filter constrained are not satisfied | 49 # leave the filter loop, because one filter constrained are not satisfied |
| 47 break | 50 break |
| 48 else: | 51 else: |
| 54 def filter_new_compounds(args, filters): | 57 def filter_new_compounds(args, filters): |
| 55 | 58 |
| 56 if args.iformat == args.oformat: | 59 if args.iformat == args.oformat: |
| 57 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out | 60 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out |
| 58 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text | 61 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text |
| 59 cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output) | 62 cmd = "obabel -i%s %s -ocopy -O %s --filter" % ( |
| 63 args.iformat, | |
| 64 args.input, | |
| 65 args.output, | |
| 66 ) | |
| 60 else: | 67 else: |
| 61 cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output) | 68 cmd = "obabel -i%s %s -o%s -O %s --filter" % ( |
| 62 filter_cmd = '' | 69 args.iformat, |
| 70 args.input, | |
| 71 args.oformat, | |
| 72 args.output, | |
| 73 ) | |
| 74 filter_cmd = "" | |
| 63 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] | 75 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] |
| 64 for key, elem in filters.items(): | 76 for key, elem in filters.items(): |
| 65 ob_descriptor_name = cheminfolib.OBDescriptor[key][0] | 77 ob_descriptor_name = cheminfolib.OBDescriptor[key][0] |
| 66 min = elem[0] | 78 min = elem[0] |
| 67 max = elem[1] | 79 max = elem[1] |
| 68 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) | 80 filter_cmd += " %s>=%s %s<=%s " % ( |
| 81 ob_descriptor_name, | |
| 82 min, | |
| 83 ob_descriptor_name, | |
| 84 max, | |
| 85 ) | |
| 69 | 86 |
| 70 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) | 87 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) |
| 71 # print '%s "%s"' % (cmd, filter_cmd) | 88 # print '%s "%s"' % (cmd, filter_cmd) |
| 72 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout | 89 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout |
| 73 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 90 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 74 | 91 |
| 75 stdout, stderr = child.communicate() | 92 stdout, stderr = child.communicate() |
| 76 return_code = child.returncode | 93 return_code = child.returncode |
| 77 | 94 |
| 78 if return_code: | 95 if return_code: |
| 79 sys.stdout.write(stdout.decode('utf-8')) | 96 sys.stdout.write(stdout.decode("utf-8")) |
| 80 sys.stderr.write(stderr.decode('utf-8')) | 97 sys.stderr.write(stderr.decode("utf-8")) |
| 81 sys.stderr.write("Return error code %i from command:\n" % return_code) | 98 sys.stderr.write("Return error code %i from command:\n" % return_code) |
| 82 sys.stderr.write("%s\n" % cmd) | 99 sys.stderr.write("%s\n" % cmd) |
| 83 else: | 100 else: |
| 84 sys.stdout.write(stdout.decode('utf-8')) | 101 sys.stdout.write(stdout.decode("utf-8")) |
| 85 sys.stdout.write(stderr.decode('utf-8')) | 102 sys.stdout.write(stderr.decode("utf-8")) |
| 86 | 103 |
| 87 | 104 |
| 88 def filter_by_name(args): | 105 def filter_by_name(args): |
| 89 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | 106 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) |
| 90 for mol in pybel.readfile('sdf', args.input): | 107 for mol in pybel.readfile("sdf", args.input): |
| 91 for name in open(args.list_of_names): | 108 for name in open(args.list_of_names): |
| 92 if mol.title.strip() == name.strip(): | 109 if mol.title.strip() == name.strip(): |
| 93 outfile.write(mol) | 110 outfile.write(mol) |
| 94 outfile.close() | 111 outfile.close() |
| 95 | 112 |
| 96 | 113 |
| 97 def __main__(): | 114 def __main__(): |
| 98 """ | 115 """ |
| 99 Select compounds with certain properties from a small library | 116 Select compounds with certain properties from a small library |
| 100 """ | 117 """ |
| 101 args = parse_command_line() | 118 args = parse_command_line() |
| 102 | 119 |
| 103 if args.filters == '__filter_by_name__': | 120 if args.filters == "__filter_by_name__": |
| 104 filter_by_name(args) | 121 filter_by_name(args) |
| 105 return | 122 return |
| 106 | 123 |
| 107 # Its a small trick to get the parameters in an easy way from the xml file. | 124 # Its a small trick to get the parameters in an easy way from the xml file. |
| 108 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. | 125 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. |
| 109 # Also the last loop creates a ',{' that is not an valid jason expression. | 126 # Also the last loop creates a ',{' that is not an valid jason expression. |
| 110 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) | 127 filters = json.loads((args.filters).replace(" ", "").replace(",}", "}")) |
| 111 if args.iformat == 'sdf': | 128 if args.iformat == "sdf": |
| 112 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering | 129 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering |
| 113 mol = next(pybel.readfile('sdf', args.input)) | 130 mol = next(pybel.readfile("sdf", args.input)) |
| 114 for key, elem in filters.items(): | 131 for key, elem in filters.items(): |
| 115 property = cheminfolib.ColumnNames.get(key, key) | 132 property = cheminfolib.ColumnNames.get(key, key) |
| 116 if property not in mol.data: | 133 if property not in mol.data: |
| 117 break | 134 break |
| 118 else: | 135 else: |
