Mercurial > repos > bgruening > uniprot_rest_interface
comparison uniprot.py @ 7:bfdc6a7ffd3a draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/uniprot_rest_interface commit ddbed5f0b0879b4a001d2da6a521b0c9a39c1e7b"
author | bgruening |
---|---|
date | Thu, 22 Apr 2021 17:32:16 +0000 |
parents | f7ebd1b4783b |
children | af5eccf83605 |
comparison
equal
deleted
inserted
replaced
6:054483e27a35 | 7:bfdc6a7ffd3a |
---|---|
5 | 5 |
6 Based on work from Jan Rudolph: https://github.com/jdrudolph/uniprot | 6 Based on work from Jan Rudolph: https://github.com/jdrudolph/uniprot |
7 available services: | 7 available services: |
8 map | 8 map |
9 retrieve | 9 retrieve |
10 | |
11 rewitten using inspiration form: https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/ | |
10 """ | 12 """ |
11 import argparse | 13 import argparse |
12 import sys | 14 import sys |
13 | 15 |
14 import requests | 16 import requests |
15 | 17 from requests.adapters import HTTPAdapter |
16 url = 'https://www.uniprot.org/' | 18 from requests.packages.urllib3.util.retry import Retry |
17 | 19 |
18 | 20 |
19 def _retrieve(query, format='txt'): | 21 DEFAULT_TIMEOUT = 5 # seconds |
20 """_retrieve is not meant for use with the python interface, use `retrieve` | 22 URL = 'https://www.uniprot.org/' |
21 instead""" | |
22 tool = 'uploadlists/' | |
23 | 23 |
24 query = list(set(query.split('\n'))) | 24 retry_strategy = Retry( |
25 queries = [query[i:i+100] for i in range(0, len(query), 100)] | 25 total=5, |
26 | 26 backoff_factor=2, |
27 data = { | 27 status_forcelist=[429, 500, 502, 503, 504], |
28 'format': format, | 28 allowed_methods=["HEAD", "GET", "OPTIONS", "POST"] |
29 'from': 'ACC+ID', | 29 ) |
30 'to': 'ACC' | |
31 } | |
32 | |
33 responses = [requests.post(url + tool, data=data, files={'file': ' '.join(_)}) for _ in queries] | |
34 page = ''.join(response.text for response in responses) | |
35 return page | |
36 | 30 |
37 | 31 |
38 def _map(query, f, t, format='tab'): | 32 class TimeoutHTTPAdapter(HTTPAdapter): |
33 def __init__(self, *args, **kwargs): | |
34 self.timeout = DEFAULT_TIMEOUT | |
35 if "timeout" in kwargs: | |
36 self.timeout = kwargs["timeout"] | |
37 del kwargs["timeout"] | |
38 super().__init__(*args, **kwargs) | |
39 | |
40 def send(self, request, **kwargs): | |
41 timeout = kwargs.get("timeout") | |
42 if timeout is None: | |
43 kwargs["timeout"] = self.timeout | |
44 return super().send(request, **kwargs) | |
45 | |
46 | |
47 def _map(query, f, t, format='tab', chunk_size=100): | |
39 """ _map is not meant for use with the python interface, use `map` instead | 48 """ _map is not meant for use with the python interface, use `map` instead |
40 """ | 49 """ |
41 tool = 'uploadlists/' | 50 tool = 'uploadlists/' |
51 data = {'format': format, 'from': f, 'to': t} | |
42 | 52 |
43 data = { | 53 req = [] |
44 'from': f, | 54 for i in range(0, len(query), chunk_size): |
45 'to': t, | 55 q = query[i:i + chunk_size] |
46 'format': format, | 56 req.append(dict([("url", URL + tool), |
47 'query': query | 57 ('data', data), |
48 } | 58 ("files", {'file': ' '.join(q)})])) |
49 response = requests.post(url + tool, data=data) | 59 return req |
60 response = requests.post(URL + tool, data=data) | |
61 response.raise_for_status() | |
50 page = response.text | 62 page = response.text |
63 if "The service is temporarily unavailable" in page: | |
64 exit("The UNIPROT service is temporarily unavailable. Please try again later.") | |
51 return page | 65 return page |
52 | 66 |
53 | 67 |
54 if __name__ == '__main__': | 68 if __name__ == '__main__': |
55 parser = argparse.ArgumentParser(description='retrieve uniprot mapping') | 69 parser = argparse.ArgumentParser(description='retrieve uniprot mapping') |
70 retrieve.add_argument('out', nargs='?', type=argparse.FileType('w'), | 84 retrieve.add_argument('out', nargs='?', type=argparse.FileType('w'), |
71 default=sys.stdout, help='output file (default: stdout)') | 85 default=sys.stdout, help='output file (default: stdout)') |
72 retrieve.add_argument('-f', '--format', help='specify output format', default='txt') | 86 retrieve.add_argument('-f', '--format', help='specify output format', default='txt') |
73 | 87 |
74 args = parser.parse_args() | 88 args = parser.parse_args() |
75 query = args.inp.read() | 89 |
90 # get the IDs from the file as sorted list | |
91 # (sorted is convenient for testing) | |
92 query = set() | |
93 for line in args.inp: | |
94 query.add(line.strip()) | |
95 query = sorted(query) | |
76 | 96 |
77 if args.tool == 'map': | 97 if args.tool == 'map': |
78 args.out.write(_map(query, args.f, args.t, args.format)) | 98 pload = _map(query, args.f, args.t, chunk_size=100) |
99 elif args.tool == 'retrieve': | |
100 pload = _map(query, 'ACC+ID', 'ACC', args.format, chunk_size=100) | |
79 | 101 |
80 elif args.tool == 'retrieve': | 102 adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) |
81 args.out.write(_retrieve(query, format=args.format)) | 103 http = requests.Session() |
104 http.mount("https://", adapter) | |
105 for i, p in enumerate(pload): | |
106 response = http.post(**p) | |
107 args.out.write(response.text) | |
108 http.close() |