diff ob_filter.py @ 15:7b6fd1c273cd draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author bgruening
date Thu, 15 Aug 2024 11:03:49 +0000
parents f697d9601273
children
line wrap: on
line diff
--- a/ob_filter.py	Tue Nov 10 20:36:42 2020 +0000
+++ b/ob_filter.py	Thu Aug 15 11:03:49 2024 +0000
@@ -14,33 +14,36 @@
 
 import cheminfolib
 from openbabel import pybel
+
 cheminfolib.pybel_stop_logging()
 
 
 def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input', help='Input file name')
-    parser.add_argument('-iformat', help='Input file format')
-    parser.add_argument('-oformat', default='smi',
-                        help='Output file format')
-    parser.add_argument('-o', '--output', help='Output file name',
-                        required=True)
-    parser.add_argument('--filters', help="Specify the filters to apply",
-                        required=True)
-    parser.add_argument('--list_of_names', required=False,
-                        help="A file with list of molecule names to extract. Every name is in one line.")
+    parser.add_argument("-i", "--input", help="Input file name")
+    parser.add_argument("-iformat", help="Input file format")
+    parser.add_argument("-oformat", default="smi", help="Output file format")
+    parser.add_argument("-o", "--output", help="Output file name", required=True)
+    parser.add_argument("--filters", help="Specify the filters to apply", required=True)
+    parser.add_argument(
+        "--list_of_names",
+        required=False,
+        help="A file with list of molecule names to extract. Every name is in one line.",
+    )
     return parser.parse_args()
 
 
 def filter_precalculated_compounds(args, filters):
     outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
-    for mol in pybel.readfile('sdf', args.input):
+    for mol in pybel.readfile("sdf", args.input):
         for key, elem in filters.items():
             # map the short description to the larger metadata names stored in the sdf file
             property = cheminfolib.ColumnNames.get(key, key)
             min = elem[0]
             max = elem[1]
-            if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max):
+            if float(mol.data[property]) >= float(min) and float(
+                mol.data[property]
+            ) <= float(max):
                 pass
             else:
                 # leave the filter loop, because one filter constrained are not satisfied
@@ -56,16 +59,30 @@
     if args.iformat == args.oformat:
         # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out
         # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text
-        cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output)
+        cmd = "obabel -i%s %s -ocopy -O %s --filter" % (
+            args.iformat,
+            args.input,
+            args.output,
+        )
     else:
-        cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output)
-    filter_cmd = ''
+        cmd = "obabel -i%s %s -o%s -O %s --filter" % (
+            args.iformat,
+            args.input,
+            args.oformat,
+            args.output,
+        )
+    filter_cmd = ""
     # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1]
     for key, elem in filters.items():
         ob_descriptor_name = cheminfolib.OBDescriptor[key][0]
         min = elem[0]
         max = elem[1]
-        filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max)
+        filter_cmd += " %s>=%s %s<=%s " % (
+            ob_descriptor_name,
+            min,
+            ob_descriptor_name,
+            max,
+        )
 
     args = shlex.split('%s "%s"' % (cmd, filter_cmd))
     # print '%s "%s"' % (cmd, filter_cmd)
@@ -76,18 +93,18 @@
     return_code = child.returncode
 
     if return_code:
-        sys.stdout.write(stdout.decode('utf-8'))
-        sys.stderr.write(stderr.decode('utf-8'))
+        sys.stdout.write(stdout.decode("utf-8"))
+        sys.stderr.write(stderr.decode("utf-8"))
         sys.stderr.write("Return error code %i from command:\n" % return_code)
         sys.stderr.write("%s\n" % cmd)
     else:
-        sys.stdout.write(stdout.decode('utf-8'))
-        sys.stdout.write(stderr.decode('utf-8'))
+        sys.stdout.write(stdout.decode("utf-8"))
+        sys.stdout.write(stderr.decode("utf-8"))
 
 
 def filter_by_name(args):
     outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
-    for mol in pybel.readfile('sdf', args.input):
+    for mol in pybel.readfile("sdf", args.input):
         for name in open(args.list_of_names):
             if mol.title.strip() == name.strip():
                 outfile.write(mol)
@@ -96,21 +113,21 @@
 
 def __main__():
     """
-        Select compounds with certain properties from a small library
+    Select compounds with certain properties from a small library
     """
     args = parse_command_line()
 
-    if args.filters == '__filter_by_name__':
+    if args.filters == "__filter_by_name__":
         filter_by_name(args)
         return
 
     # Its a small trick to get the parameters in an easy way from the xml file.
     # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
     # Also the last loop creates a ',{' that is not an valid jason expression.
-    filters = json.loads((args.filters).replace(' ', '').replace(',}', '}'))
-    if args.iformat == 'sdf':
+    filters = json.loads((args.filters).replace(" ", "").replace(",}", "}"))
+    if args.iformat == "sdf":
         # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering
-        mol = next(pybel.readfile('sdf', args.input))
+        mol = next(pybel.readfile("sdf", args.input))
         for key, elem in filters.items():
             property = cheminfolib.ColumnNames.get(key, key)
             if property not in mol.data: