comparison uniprot.py @ 1:cd2a41c65447 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/uniprot_rest_interface commit fb201f32a92466c2bd8086e91bbf777401f1b489
author bgruening
date Fri, 14 Oct 2016 17:59:02 -0400
parents 48522382b6a4
children 144e9558d3fd
comparison
equal deleted inserted replaced
0:48522382b6a4 1:cd2a41c65447
6 Based on work from Jan Rudolph: https://github.com/jdrudolph/uniprot 6 Based on work from Jan Rudolph: https://github.com/jdrudolph/uniprot
7 available services: 7 available services:
8 map 8 map
9 retrieve 9 retrieve
10 """ 10 """
11 import argparse
12 import sys
11 13
12 import requests 14 import requests
13 import sys, argparse
14 15
15 url = 'http://www.uniprot.org/' 16 url = 'http://www.uniprot.org/'
17
16 18
17 def _retrieve(query, format='txt'): 19 def _retrieve(query, format='txt'):
18 """_retrieve is not meant for use with the python interface, use `retrieve` 20 """_retrieve is not meant for use with the python interface, use `retrieve`
19 instead""" 21 instead"""
20 tool = 'batch/' 22 tool = 'batch/'
21 23
22 query = list(set(query.split('\n'))) 24 query = list(set(query.split('\n')))
23 queries = [query[i:i+100] for i in range(0, len(query), 100)] 25 queries = [query[i:i+100] for i in range(0, len(query), 100)]
24 26
25 data = {'format':format} 27 data = {'format': format}
26 28
27 responses = [requests.post(url + tool, data=data, files={'file':' '.join(query)}) for query in queries] 29 responses = [requests.post(url + tool, data=data, files={'file': ' '.join(_)}) for _ in queries]
28 page = ''.join([response.text for response in responses]) 30 page = ''.join(response.text for response in responses)
29 return page 31 return page
30 32
31 def retrieve(ids, format='txt'):
32 """ request entries by uniprot acc using batch retrieval
33
34 Args:
35 query: list of ids to retrieve
36 format: txt by default
37
38 Help:
39 possible formats:
40 txt, xml, rdf, fasta, gff"""
41 if type(ids) is not list:
42 ids = [ids]
43 return _retrieve(' '.join(ids), format)
44 33
45 def _map(query, f, t, format='tab'): 34 def _map(query, f, t, format='tab'):
46 """ _map is not meant for use with the python interface, use `map` instead 35 """ _map is not meant for use with the python interface, use `map` instead
47 """ 36 """
48 tool = 'mapping/' 37 tool = 'mapping/'
49 38
50 data = { 39 data = {
51 'from':f, 40 'from': f,
52 'to':t, 41 'to': t,
53 'format':format, 42 'format': format,
54 'query': query 43 'query': query
55 } 44 }
56 response = requests.post(url + tool, data=data) 45 response = requests.post(url + tool, data=data)
57 page = response.text 46 page = response.text
58 return page 47 return page
59 48
60 def map(ids, f, t, format='tab'):
61 """ map a list of ids from one format onto another using uniprots mapping api
62
63 Args:
64 query: id or list of ids to be mapped
65 f: from ACC | P_ENTREZGENEID | ...
66 t: to ...
67 format: tab by default
68
69 Help:
70 for a list of all possible mappings visit
71 'http://www.uniprot.org/faq/28'
72 """
73 if type(ids) is not list:
74 ids = [ids]
75 page = _map(' '.join(ids), f, t, format)
76 result = dict()
77 for row in page.splitlines()[1:]:
78 key, value = row.split('\t')
79 if key in result:
80 result[key].add(value)
81 else:
82 result[key] = set([value])
83 return result
84 49
85 if __name__ == '__main__': 50 if __name__ == '__main__':
86 import argparse
87 import sys
88
89 parser = argparse.ArgumentParser(description='retrieve uniprot mapping') 51 parser = argparse.ArgumentParser(description='retrieve uniprot mapping')
90 subparsers = parser.add_subparsers(dest='tool') 52 subparsers = parser.add_subparsers(dest='tool')
91 53
92 mapping = subparsers.add_parser('map') 54 mapping = subparsers.add_parser('map')
93 mapping.add_argument('f', help='from') 55 mapping.add_argument('f', help='from')
94 mapping.add_argument('t', help='to') 56 mapping.add_argument('t', help='to')
95 mapping.add_argument('inp', nargs='?', type=argparse.FileType('r'), 57 mapping.add_argument('inp', nargs='?', type=argparse.FileType('r'),
96 default=sys.stdin, help='input file (default: stdin)') 58 default=sys.stdin, help='input file (default: stdin)')
97 mapping.add_argument('out', nargs='?', type=argparse.FileType('w'), 59 mapping.add_argument('out', nargs='?', type=argparse.FileType('w'),
98 default=sys.stdout, help='output file (default: stdout)') 60 default=sys.stdout, help='output file (default: stdout)')
99 mapping.add_argument('--format', default='tab', help='output format') 61 mapping.add_argument('--format', default='tab', help='output format')
100 62
101 retrieve = subparsers.add_parser('retrieve') 63 retrieve = subparsers.add_parser('retrieve')
102 retrieve.add_argument('inp', metavar = 'in', nargs='?', type=argparse.FileType('r'), 64 retrieve.add_argument('inp', metavar='in', nargs='?', type=argparse.FileType('r'),
103 default=sys.stdin, help='input file (default: stdin)') 65 default=sys.stdin, help='input file (default: stdin)')
104 retrieve.add_argument('out', nargs='?', type=argparse.FileType('w'), 66 retrieve.add_argument('out', nargs='?', type=argparse.FileType('w'),
105 default=sys.stdout, help='output file (default: stdout)') 67 default=sys.stdout, help='output file (default: stdout)')
106 retrieve.add_argument('-f', '--format', help='specify output format', default='txt') 68 retrieve.add_argument('-f', '--format', help='specify output format', default='txt')
107 69
108 args = parser.parse_args() 70 args = parser.parse_args()
109 query = args.inp.read() 71 query = args.inp.read()
110 72
111 if args.tool == 'map': 73 if args.tool == 'map':
112 args.out.write(_map(query, args.f, args.t, args.format)) 74 args.out.write(_map(query, args.f, args.t, args.format))
113 75
114 elif args.tool == 'retrieve': 76 elif args.tool == 'retrieve':
115 args.out.write(_retrieve(query, format=args.format)) 77 args.out.write(_retrieve(query, format=args.format))
116
117