diff uniprot.py @ 0:48522382b6a4 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/uniprot_rest_interface commit 2b8ad1bbfe098129ae32cd8311a755dff58ae97b-dirty
author bgruening
date Fri, 09 Oct 2015 16:42:22 -0400
parents
children cd2a41c65447
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uniprot.py	Fri Oct 09 16:42:22 2015 -0400
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+"""
+uniprot python interface
+to access the uniprot database
+
+Based on work from Jan Rudolph: https://github.com/jdrudolph/uniprot
+available services:
+    map
+    retrieve
+"""
+
+import requests
+import sys, argparse
+
+url = 'http://www.uniprot.org/'
+
+def _retrieve(query, format='txt'):
+    """_retrieve is not meant for use with the python interface, use `retrieve`
+    instead"""
+    tool = 'batch/'
+
+    query = list(set(query.split('\n')))
+    queries = [query[i:i+100] for i in range(0, len(query), 100)]
+
+    data = {'format':format}
+
+    responses = [requests.post(url + tool, data=data, files={'file':' '.join(query)}) for query in queries]
+    page = ''.join([response.text for response in responses])
+    return page
+
+def retrieve(ids, format='txt'):
+    """ request entries by uniprot acc using batch retrieval
+
+    Args:
+        query: list of ids to retrieve
+        format: txt by default
+
+    Help:
+        possible formats:
+        txt, xml, rdf, fasta, gff"""
+    if type(ids) is not list:
+        ids = [ids]
+    return _retrieve(' '.join(ids), format)
+
+def _map(query, f, t, format='tab'):
+    """ _map is not meant for use with the python interface, use `map` instead
+    """
+    tool = 'mapping/'
+
+    data = {
+            'from':f,
+            'to':t,
+            'format':format,
+            'query': query
+            }
+    response = requests.post(url + tool, data=data)
+    page = response.text
+    return page
+
+def map(ids, f, t, format='tab'):
+    """ map a list of ids from one format onto another using uniprots mapping api
+    
+    Args:
+        query: id or list of ids to be mapped
+        f: from ACC | P_ENTREZGENEID | ...
+        t: to ...
+        format: tab by default
+
+    Help:
+        for a list of all possible mappings visit
+        'http://www.uniprot.org/faq/28'
+    """
+    if type(ids) is not list:
+        ids = [ids]
+    page = _map(' '.join(ids), f, t, format)
+    result = dict()
+    for row in page.splitlines()[1:]:
+        key, value = row.split('\t')
+        if key in result:
+            result[key].add(value)
+        else:
+            result[key] = set([value])
+    return result
+
+if __name__ == '__main__':
+    import argparse
+    import sys
+
+    parser = argparse.ArgumentParser(description='retrieve uniprot mapping')
+    subparsers = parser.add_subparsers(dest='tool')
+
+    mapping = subparsers.add_parser('map')
+    mapping.add_argument('f', help='from')
+    mapping.add_argument('t', help='to')
+    mapping.add_argument('inp', nargs='?', type=argparse.FileType('r'),
+            default=sys.stdin, help='input file (default: stdin)')
+    mapping.add_argument('out', nargs='?', type=argparse.FileType('w'),
+            default=sys.stdout, help='output file (default: stdout)')
+    mapping.add_argument('--format', default='tab', help='output format')
+
+    retrieve = subparsers.add_parser('retrieve')
+    retrieve.add_argument('inp', metavar = 'in', nargs='?', type=argparse.FileType('r'),
+            default=sys.stdin, help='input file (default: stdin)')
+    retrieve.add_argument('out', nargs='?', type=argparse.FileType('w'),
+            default=sys.stdout, help='output file (default: stdout)')
+    retrieve.add_argument('-f', '--format', help='specify output format', default='txt')
+
+    args = parser.parse_args()
+    query = args.inp.read()
+
+    if args.tool == 'map':
+        args.out.write(_map(query, args.f, args.t, args.format))
+
+    elif args.tool == 'retrieve':
+        args.out.write(_retrieve(query, format=args.format))
+
+