view get_data/kegg_glycan/findKEGG.py @ 1:0a5e0df17054 draft default tip

Uploaded
author chrisb
date Fri, 06 May 2016 08:05:48 -0400
parents 89592faa2875
children
line wrap: on
line source

__author__ = 'cbarnett'
__license__ = "MIT"
__version__ = "0.4"
# http://www.kegg.jp/kegg/rest/keggapi.html


def find_entries_in_KEGG(db, query):
    """
    :param db: db to search by default is glycan
    :param query:
    :return: string  of linked entries
    """
    import urllib2

    if db == [] or db == "" or db is None:
        raise IOError("no db given")
    if query == [] or query == "" or query is None:
        raise IOError("no query given")
    query = query.replace('\n', '+')  # in case of new lines, assume this means the user want to "AND"
    query = query.replace('\r', '+')  # in case of new lines, assume this means the user want to "AND"
    query = query.replace('ec: ', 'ec:')  # in case of ec: spaces - e.g. issue "ec: 2.4.99.1" spaces are ok usually as allows for combination searching "2.4.99.1 2.4.99.6"
    
    import re
    p = re.compile(' *\+ *') # ensure no unneccesary space in an AND query, otherwise incorrect behaviour from KEGG
    queryfix = p.subn('+', query)
    query=queryfix[0]

    if ' ' in query:
        query='"'+query+'"' # if spaces, query must be placed in quotes, otherwise incorrect behaviour from KEGG
    query = query.replace(' ', '%20')  # previous behaviour was ignoring text after a space, rather convert to '%20' and pass on to KEGG REST service
    uri = 'http://rest.kegg.jp/find/'
    fulluri = uri + db + "/" + query
    try:
        response = urllib2.urlopen(fulluri).read()
    except Exception as e:
        raise urllib2.HTTPError(e.url, e.code, e.msg, e.hdrs, e.fp)
    if str(response.strip()) == "":
        return "" # change return from None to "" for easily writing to file
    return response


if __name__ == "__main__":
    from optparse import OptionParser

    usage = "usage: python %prog [options]\n"
    parser = OptionParser(usage=usage)
    parser.add_option("-d", action="store", type="string", dest="d", default="glycan",
                      help="db name, options are: pathway | brite | module | ko | genome | <org> | compound | glycan | reaction | rpair | rclass | enzyme | disease | drug | dgroup | environ")
    parser.add_option("-q", action="store", type="string", dest="q", default="glucose",
                      help="query e.g. glucose")
    parser.add_option("-o", action="store", type="string", dest="o", default="found_entries.txt",
                      help="entries from search output in text format")
    (options, args) = parser.parse_args()
    try:
        outstream = file(options.o, 'w')
    except Exception as e:
        raise IOError(e, "the output file cannot be opened. Use -h flag for help")
    results = find_entries_in_KEGG(db=options.d, query=options.q)
    try:
        outstream.write(results)
    except Exception as e:
        raise IOError(e, "cannot open output files. -h flag for help")
    finally:
        outstream.close()