Mercurial > repos > bgruening > openbabel_remsmall
diff ob_filter.py @ 15:81836d586797 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author | bgruening |
---|---|
date | Thu, 15 Aug 2024 11:01:36 +0000 |
parents | e94b2920d4e4 |
children |
line wrap: on
line diff
--- a/ob_filter.py Tue Nov 10 20:31:26 2020 +0000 +++ b/ob_filter.py Thu Aug 15 11:01:36 2024 +0000 @@ -14,33 +14,36 @@ import cheminfolib from openbabel import pybel + cheminfolib.pybel_stop_logging() def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('-i', '--input', help='Input file name') - parser.add_argument('-iformat', help='Input file format') - parser.add_argument('-oformat', default='smi', - help='Output file format') - parser.add_argument('-o', '--output', help='Output file name', - required=True) - parser.add_argument('--filters', help="Specify the filters to apply", - required=True) - parser.add_argument('--list_of_names', required=False, - help="A file with list of molecule names to extract. Every name is in one line.") + parser.add_argument("-i", "--input", help="Input file name") + parser.add_argument("-iformat", help="Input file format") + parser.add_argument("-oformat", default="smi", help="Output file format") + parser.add_argument("-o", "--output", help="Output file name", required=True) + parser.add_argument("--filters", help="Specify the filters to apply", required=True) + parser.add_argument( + "--list_of_names", + required=False, + help="A file with list of molecule names to extract. Every name is in one line.", + ) return parser.parse_args() def filter_precalculated_compounds(args, filters): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) - for mol in pybel.readfile('sdf', args.input): + for mol in pybel.readfile("sdf", args.input): for key, elem in filters.items(): # map the short description to the larger metadata names stored in the sdf file property = cheminfolib.ColumnNames.get(key, key) min = elem[0] max = elem[1] - if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max): + if float(mol.data[property]) >= float(min) and float( + mol.data[property] + ) <= float(max): pass else: # leave the filter loop, because one filter constrained are not satisfied @@ -56,16 +59,30 @@ if args.iformat == args.oformat: # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text - cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output) + cmd = "obabel -i%s %s -ocopy -O %s --filter" % ( + args.iformat, + args.input, + args.output, + ) else: - cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output) - filter_cmd = '' + cmd = "obabel -i%s %s -o%s -O %s --filter" % ( + args.iformat, + args.input, + args.oformat, + args.output, + ) + filter_cmd = "" # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] for key, elem in filters.items(): ob_descriptor_name = cheminfolib.OBDescriptor[key][0] min = elem[0] max = elem[1] - filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) + filter_cmd += " %s>=%s %s<=%s " % ( + ob_descriptor_name, + min, + ob_descriptor_name, + max, + ) args = shlex.split('%s "%s"' % (cmd, filter_cmd)) # print '%s "%s"' % (cmd, filter_cmd) @@ -76,18 +93,18 @@ return_code = child.returncode if return_code: - sys.stdout.write(stdout.decode('utf-8')) - sys.stderr.write(stderr.decode('utf-8')) + sys.stdout.write(stdout.decode("utf-8")) + sys.stderr.write(stderr.decode("utf-8")) sys.stderr.write("Return error code %i from command:\n" % return_code) sys.stderr.write("%s\n" % cmd) else: - sys.stdout.write(stdout.decode('utf-8')) - sys.stdout.write(stderr.decode('utf-8')) + sys.stdout.write(stdout.decode("utf-8")) + sys.stdout.write(stderr.decode("utf-8")) def filter_by_name(args): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) - for mol in pybel.readfile('sdf', args.input): + for mol in pybel.readfile("sdf", args.input): for name in open(args.list_of_names): if mol.title.strip() == name.strip(): outfile.write(mol) @@ -96,21 +113,21 @@ def __main__(): """ - Select compounds with certain properties from a small library + Select compounds with certain properties from a small library """ args = parse_command_line() - if args.filters == '__filter_by_name__': + if args.filters == "__filter_by_name__": filter_by_name(args) return # Its a small trick to get the parameters in an easy way from the xml file. # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. # Also the last loop creates a ',{' that is not an valid jason expression. - filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) - if args.iformat == 'sdf': + filters = json.loads((args.filters).replace(" ", "").replace(",}", "}")) + if args.iformat == "sdf": # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering - mol = next(pybel.readfile('sdf', args.input)) + mol = next(pybel.readfile("sdf", args.input)) for key, elem in filters.items(): property = cheminfolib.ColumnNames.get(key, key) if property not in mol.data: