view ob_filter.py @ 2:30a6c37e95ac draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 78ac0521d7df684e96c1b9c1ba2a17b02e681608
author bgruening
date Sat, 20 May 2017 20:04:59 -0400
parents 98e12cc1f3a8
children fc199b60875d
line wrap: on
line source

#!/usr/bin/env python
"""
    Input: set of molecules with pre-calculated physico-chemical properties
    Output: set of molecules that pass all the filters
    Copyright 2012, Bjoern Gruening and Xavier Lucas

    TODO: AND/OR conditions?
"""
import sys, os
import argparse
import cheminfolib
import json
import pybel
import shlex, subprocess

cheminfolib.pybel_stop_logging()

def parse_command_line():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help='Input file name')
    parser.add_argument('-iformat', help='Input file format')
    parser.add_argument('-oformat', 
        default='smi',
        help='Output file format')
    parser.add_argument('-o', '--output', 
        help='Output file name',
        required=True)
    parser.add_argument('--filters', 
        help="Specify the filters to apply",
        required=True,
        )
    return parser.parse_args()

def filter_precalculated_compounds(args, filters):
    outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
    for mol in pybel.readfile('sdf', args.input):
        for key, elem in filters.items():
            # map the short description to the larger metadata names stored in the sdf file
            property = cheminfolib.ColumnNames[key]
            min = elem[0]
            max = elem[1]
            if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max):
                pass
            else:
                # leave the filter loop, because one filter constrained are not satisfied
                break
        else:
            # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound
            outfile.write(mol)
    outfile.close()

def filter_new_compounds(args, filters):

    if args.iformat == args.oformat:
        # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out
        # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text
        cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output)
    else:
        cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output)
    filter_cmd = ''
    # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1]
    for key, elem in filters.items():
        ob_descriptor_name = cheminfolib.OBDescriptor[key][0]
        min = elem[0]
        max = elem[1]
        filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max)

    args = shlex.split('%s "%s"' % (cmd, filter_cmd))
    #print '%s "%s"' % (cmd, filter_cmd)
    # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout
    child = subprocess.Popen(args,
        stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    stdout, stderr = child.communicate()
    return_code = child.returncode

    if return_code:
        sys.stdout.write(stdout.decode('utf-8'))
        sys.stderr.write(stderr.decode('utf-8'))
        sys.stderr.write("Return error code %i from command:\n" % return_code)
        sys.stderr.write("%s\n" % cmd)
    else:
        sys.stdout.write(stdout.decode('utf-8'))
        sys.stdout.write(stderr.decode('utf-8'))


def __main__():
    """
        Select compounds with certain properties from a small library
    """
    args = parse_command_line()
    # Its a small trick to get the parameters in an easy way from the xml file.
    # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
    # Also the last loop creates a ',{' that is not an valid jason expression.
    filters = json.loads((args.filters).replace(' ', '').replace(',}', '}'))
    if args.iformat == 'sdf':
        # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering
        mol = next(pybel.readfile('sdf', args.input))
        for key, elem in filters.items():
            property = cheminfolib.ColumnNames[key]
            if not property in mol.data:
                break
        else:
            # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule
            # assume it is the same for all other molecules and start the precalculated filtering
            filter_precalculated_compounds(args, filters)
            return True
    filter_new_compounds(args, filters)


if __name__ == "__main__" :
    __main__()
oz۶~;$,Keʖ,Hv5g~*<ǭ%Md ")Rv؆!P\YD!\{}1y"jR2X)}s) Ek9dK^Zi8 c`NvCz,U2UgMƻ6}W$4sK8 5`ypb剷LP|#ߝYT5wIU3X&wwZ%EB4,鮀q_x֍e~Ƥ,`Hb]x@a=._pZx'|̖9ʢTG_=3XM?C.|>Bݼ̼Cb>8$~ eo?}zNb$s:_@D~L&@txTqχ<v;m{}Qi,{4ATpˋ\.a˅QYXggpAx]>iYA!YSk$ED&4xWUnꯧ߯iClchR`{(L܄<2ʖ~ūe2'F)6xM _? W|1Lu5,t~q S_ 5 \rظ&>N?JS]]eW4R(̛-4F~ d`FPAUHACzT4dGT1tQ 鱩٫6) cSWc* n^ >.c?6ֱtSpHpc(p7&Ѣ,c tdOxteUuC:b<Q/S9:92o Nt{ut4l?A:zБg:c.3=!:6Hg3O!wojR{LϝsmhCd{{BnϷ=`uƃm:/ytjI9G}" \"NL#*n.W>"b}G۫2nKi'(T-I|DR$\yQFjYOÍY-%'Aweϯ㰼>޹YOSKUj!Zt( [JFR K iш^Ym nN !O1w(*#jNNX%$m:b xE3& -̼e8Np?}H|YyJ4JIMDd7ue97U4g#nXL2c{@]vU*ǭaR{2[b[t.=^}U_UtQx+;ĎTvwwtowεE\ڸqFD1<YGi6Ldha~+v~ h: o=;w9{[>?'x黷Ħ/lIyUT{X=[rGA^a)EUN }k"-y@_H|EU[n_VIk*DL@?2*fz`=i0sM,@;=?#2hA_i1Љ$af0w;.16?y[*("+c7٦*@FU?$9a_F?Rj6ڍCMѬ_ЎMl mֲΈM A9/aM3DwW􈿡iǗ2氵-6 mhzŽ[aKݒ#vy\NHW`Fp4:_%+Ƙ-V:\D}4fƀ4 Z-0Vj5 8&$Z6 ָXB J2U&R/rC*>Î'=h(V;DT-4*>"Py^zNLL2\Q< ~ bsF^)EQq(yrp