Mercurial > repos > iuc > iedb_api
comparison iedb_api.py @ 0:fe3c43451319 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/iedb_api commit 9aaa7c6c7241db52681b12939ebd908902830ef1"
author | iuc |
---|---|
date | Fri, 28 Feb 2020 18:09:34 -0500 |
parents | |
children | 6cf84410cb2e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fe3c43451319 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import os.path | |
5 import re | |
6 import sys | |
7 import time | |
8 from urllib.error import HTTPError | |
9 from urllib.parse import unquote, urlencode | |
10 from urllib.request import urlopen | |
11 | |
12 mhci_methods = ['recommended', 'consensus', | |
13 'netmhcpan_ba', 'netmhcpan_el', | |
14 'ann', 'smmpmbec', 'smm', | |
15 'comblib_sidney2008', 'netmhccons', | |
16 'pickpocket', 'netmhcstabpan'] | |
17 mhcii_methods = ['recommended', 'consensus', 'NetMHCIIpan', | |
18 'nn_align', 'smm_align', 'comblib', 'tepitope'] | |
19 processing_methods = ['recommended', 'netmhcpan', 'ann', | |
20 'smmpmbec', 'smm', 'comblib_sidney2008', | |
21 'netmhccons', 'pickpocket'] | |
22 mhcnp_methods = ['mhcnp', 'netmhcpan'] | |
23 bcell_methods = ['Bepipred', 'Chou-Fasman', 'Emini', 'Karplus-Schulz', | |
24 'Kolaskar-Tongaonkar', 'Parker', 'Bepipred-2.0'] | |
25 prediction_methods = {'mhci': mhci_methods, | |
26 'mhcii': mhcii_methods, | |
27 'processing': processing_methods, | |
28 'mhcnp': mhcnp_methods, | |
29 'bcell': bcell_methods} | |
30 all_methods = set(mhci_methods + mhcii_methods + | |
31 mhcnp_methods + bcell_methods) | |
32 prediction_lengths = {'mhci': range(8, 16), | |
33 'mhcii': range(11, 31), | |
34 'processing': range(8, 15), | |
35 'mhcnp': range(8, 12), | |
36 'bcell': range(8, 16)} | |
37 | |
38 | |
39 def parse_alleles(allelefile, lengths): | |
40 alleles = [] | |
41 lengths = [] | |
42 with open(allelefile, 'r') as fh: | |
43 for i, line in enumerate(fh): | |
44 fields = line.strip().split(',') | |
45 allele = fields[0].strip() | |
46 if allele: | |
47 if len(fields) > 1: | |
48 for alen in fields[1:]: | |
49 alleles.append(allele) | |
50 lengths.append(alen) | |
51 elif lengths: | |
52 for alen in str(lengths).split(','): | |
53 alleles.append(allele) | |
54 lengths.append(alen) | |
55 else: | |
56 alleles.append(allele) | |
57 return (alleles, lengths) | |
58 | |
59 | |
60 def query(url, prediction, seq, allele, length, results, | |
61 seqid=None, method='recommended', proteasome=None, | |
62 timeout=300, retries=3, sleep=300, debug=False): | |
63 params = dict() | |
64 if method: | |
65 params['method'] = method.encode() | |
66 if proteasome: | |
67 params['proteasome'] = proteasome.encode() | |
68 params['sequence_text'] = seq.strip().encode() | |
69 if allele is not None: | |
70 params['allele'] = allele.encode() | |
71 if length is not None: | |
72 if prediction == 'bcell': | |
73 params['window_size'] = str(length).encode() | |
74 else: | |
75 params['length'] = str(length).encode() | |
76 req_data = urlencode(params) | |
77 if debug: | |
78 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) | |
79 retries = max(0, retries) + 1 | |
80 for retry in range(1, retries): | |
81 response = None | |
82 try: | |
83 response = urlopen(url, data=req_data.encode('utf-8'), | |
84 timeout=timeout) | |
85 if response and response.getcode() == 200: | |
86 data = [line.decode() for line in response.readlines()] | |
87 if debug: | |
88 print(data, file=sys.stderr) | |
89 rslts = results['prediction']['entries'] | |
90 for ln, line in enumerate(data): | |
91 if 'invalid' in line.lower() or 'tools_api.html' in line: | |
92 msg = '%s %s\n%s' % (url, unquote(req_data), | |
93 ''.join(data)) | |
94 warn_err(msg, exit_code=1) | |
95 if line.find('eptide') > 0: | |
96 results['prediction']['header'] = "#%s%s" %\ | |
97 ("ID\t" if seqid else "", line) | |
98 continue | |
99 elif method == 'Bepipred' and line.find('Residue') > 0: | |
100 results['detail']['header'] = "#%s%s" %\ | |
101 ("ID\t" if seqid else "", line) | |
102 rslts = results['detail']['entries'] | |
103 continue | |
104 if seqid: | |
105 rslts.extend("%s\t%s" % (seqid, line)) | |
106 else: | |
107 rslts.extend(line) | |
108 break | |
109 else: | |
110 code = response.getcode() if response else 1 | |
111 warn_err("Error connecting to IEDB server\n", | |
112 exit_code=code) | |
113 except HTTPError as e: | |
114 code = None if retry < retries else e.code | |
115 warn_err("%d of %d Error connecting to IEDB server %s\n" % | |
116 (retry, retries, e), | |
117 exit_code=code) | |
118 time.sleep(sleep) | |
119 except Exception as e: | |
120 warn_err("Error connecting to IEDB server %s\n" % e, | |
121 exit_code=3) | |
122 return results | |
123 | |
124 | |
125 def warn_err(msg, exit_code=1): | |
126 sys.stderr.write(msg) | |
127 sys.stderr.flush() | |
128 if exit_code: | |
129 sys.exit(exit_code) | |
130 | |
131 | |
132 def __main__(): | |
133 # Parse Command Line | |
134 parser = argparse.ArgumentParser(description='', epilog='') | |
135 parser.add_argument('-p', '--prediction', | |
136 default='mhci', | |
137 choices=prediction_methods.keys(), | |
138 help='IEDB API prediction service') | |
139 parser.add_argument('-s', '--sequence', | |
140 action="append", | |
141 default=None, | |
142 help='Peptide Sequence') | |
143 parser.add_argument('-m', '--method', | |
144 default='recommended', | |
145 choices=all_methods, | |
146 help='prediction method') | |
147 parser.add_argument('-P', '--proteasome', | |
148 default=None, | |
149 choices=['immuno', 'constitutive'], | |
150 help='IEDB processing proteasome type') | |
151 parser.add_argument('-a', '--allele', | |
152 action="append", | |
153 default=[], | |
154 help='Alleles for which to make predictions') | |
155 parser.add_argument('-A', '--allelefile', | |
156 default=None, | |
157 help='File of HLA alleles') | |
158 parser.add_argument('-l', '--length', | |
159 action="append", | |
160 default=[], | |
161 help='lengths for which to make predictions, ' + | |
162 '1 per allele') | |
163 parser.add_argument('-w', '--window_size', | |
164 type=int, | |
165 default=None, | |
166 help='window_size for bcell prediction') | |
167 parser.add_argument('-i', '--input', | |
168 default=None, | |
169 help='Input file for peptide sequences ' + | |
170 '(fasta or tabular)') | |
171 parser.add_argument('-c', '--column', | |
172 default=None, | |
173 help='Peptide Column in a tabular input file') | |
174 parser.add_argument('-C', '--id_column', | |
175 default=None, | |
176 help='ID Column in a tabular input file') | |
177 parser.add_argument('-o', '--output', | |
178 default=None, | |
179 help='Output file for query results') | |
180 parser.add_argument('-O', '--output2', | |
181 default='iedb_results2', | |
182 help='Output file for secondary query results') | |
183 parser.add_argument('-t', '--timeout', | |
184 type=int, | |
185 default=600, | |
186 help='Seconds to wait for server response') | |
187 parser.add_argument('-r', '--retries', | |
188 type=int, | |
189 default=5, | |
190 help='Number of times to retry server query') | |
191 parser.add_argument('-S', '--sleep', | |
192 type=int, | |
193 default=300, | |
194 help='Seconds to wait between retries') | |
195 parser.add_argument('-d', '--debug', | |
196 action='store_true', | |
197 default=False, | |
198 help='Turn on wrapper debugging to stderr') | |
199 args = parser.parse_args() | |
200 | |
201 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | |
202 | |
203 if args.prediction != 'bcell': | |
204 if not args.allele and not args.allelefile: | |
205 warn_err('-a allele or -A allelefile required\n', exit_code=1) | |
206 | |
207 if not (args.sequence or args.input): | |
208 warn_err('NO Sequences given: ' + | |
209 'either -s sequence or -i input_file is required\n', | |
210 exit_code=1) | |
211 | |
212 if args.output is not None: | |
213 try: | |
214 outputPath = os.path.abspath(args.output) | |
215 outputFile = open(outputPath, 'w') | |
216 except Exception as e: | |
217 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
218 else: | |
219 outputFile = sys.stdout | |
220 | |
221 # params | |
222 alleles = [] | |
223 lengths = [] | |
224 if args.prediction == 'bcell' and args.window_size is not None: | |
225 lengths.append(str(args.window_size)) | |
226 else: | |
227 if args.allelefile: | |
228 (alleles, lengths) = parse_alleles(args.allelefile, args.length) | |
229 if args.allele: | |
230 for i, allele in enumerate(args.allele): | |
231 alleles.append(allele) | |
232 alen = args.length[i] if i < len(args.length) else args.length[-1] | |
233 lengths.append(alen) | |
234 allele = ','.join(alleles) if alleles else None | |
235 length = ','.join(lengths) if lengths else None | |
236 method = args.method | |
237 proteasome = args.proteasome if args.prediction == 'processcing' else None | |
238 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ | |
239 args.prediction | |
240 | |
241 # results | |
242 results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}} | |
243 | |
244 if args.sequence: | |
245 for i, seq in enumerate(args.sequence): | |
246 seqid = 'pep_%d' % i | |
247 query(url, args.prediction, seq, allele, length, results, | |
248 seqid=seqid, method=method, proteasome=proteasome, | |
249 timeout=args.timeout, retries=args.retries, | |
250 sleep=args.sleep, debug=args.debug) | |
251 if args.input: | |
252 try: | |
253 fh = open(args.input, 'r') | |
254 if args.column: # tabular | |
255 col = int(args.column) | |
256 idcol = int(args.id_column) if args.id_column else None | |
257 for i, line in enumerate(fh): | |
258 fields = line.rstrip('\r\n').split('\t') | |
259 if len(fields) > col: | |
260 seq = re.sub('[_*]', '', fields[col].strip()) | |
261 if re.match(aapat, seq): | |
262 if idcol is not None and idcol < len(fields): | |
263 seqid = fields[idcol] | |
264 else: | |
265 seqid = 'pep_%d' % i | |
266 query(url, args.prediction, seq, allele, length, | |
267 results, seqid=seqid, | |
268 method=method, proteasome=proteasome, | |
269 timeout=args.timeout, retries=args.retries, | |
270 sleep=args.sleep, debug=args.debug) | |
271 else: | |
272 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), | |
273 exit_code=None) | |
274 else: # fasta | |
275 seqid = None | |
276 seq = '' | |
277 for i, line in enumerate(fh): | |
278 if line.startswith('>'): | |
279 if seqid and len(seq) > 0: | |
280 query(url, args.prediction, seq, allele, length, | |
281 results, seqid=seqid, | |
282 method=method, proteasome=proteasome, | |
283 timeout=args.timeout, retries=args.retries, | |
284 sleep=args.sleep, debug=args.debug) | |
285 seqid = line[1:].strip() | |
286 seq = '' | |
287 else: | |
288 seq += line.strip() | |
289 if seqid and len(seq) > 0: | |
290 query(url, args.prediction, seq, allele, length, | |
291 results, seqid=seqid, | |
292 method=method, proteasome=proteasome, | |
293 timeout=args.timeout, retries=args.retries, | |
294 sleep=args.sleep, debug=args.debug) | |
295 fh.close() | |
296 except Exception as e: | |
297 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | |
298 | |
299 if results['prediction']['header']: | |
300 outputFile.write(results['prediction']['header']) | |
301 for line in results['prediction']['entries']: | |
302 outputFile.write(line) | |
303 if results['detail']['entries']: | |
304 if args.output2: | |
305 try: | |
306 outPath = os.path.abspath(args.output2) | |
307 outFile = open(outPath, 'w') | |
308 except Exception as e: | |
309 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
310 else: | |
311 outFile = sys.stdout | |
312 if results['detail']['header']: | |
313 outFile.write(results['detail']['header']) | |
314 for line in results['detail']['entries']: | |
315 outFile.write(line) | |
316 | |
317 | |
318 if __name__ == "__main__": | |
319 __main__() |