comparison iedb_api.py @ 0:fe3c43451319 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/iedb_api commit 9aaa7c6c7241db52681b12939ebd908902830ef1"
author iuc
date Fri, 28 Feb 2020 18:09:34 -0500
parents
children 6cf84410cb2e
comparison
equal deleted inserted replaced
-1:000000000000 0:fe3c43451319
1 #!/usr/bin/env python
2
3 import argparse
4 import os.path
5 import re
6 import sys
7 import time
8 from urllib.error import HTTPError
9 from urllib.parse import unquote, urlencode
10 from urllib.request import urlopen
11
12 mhci_methods = ['recommended', 'consensus',
13 'netmhcpan_ba', 'netmhcpan_el',
14 'ann', 'smmpmbec', 'smm',
15 'comblib_sidney2008', 'netmhccons',
16 'pickpocket', 'netmhcstabpan']
17 mhcii_methods = ['recommended', 'consensus', 'NetMHCIIpan',
18 'nn_align', 'smm_align', 'comblib', 'tepitope']
19 processing_methods = ['recommended', 'netmhcpan', 'ann',
20 'smmpmbec', 'smm', 'comblib_sidney2008',
21 'netmhccons', 'pickpocket']
22 mhcnp_methods = ['mhcnp', 'netmhcpan']
23 bcell_methods = ['Bepipred', 'Chou-Fasman', 'Emini', 'Karplus-Schulz',
24 'Kolaskar-Tongaonkar', 'Parker', 'Bepipred-2.0']
25 prediction_methods = {'mhci': mhci_methods,
26 'mhcii': mhcii_methods,
27 'processing': processing_methods,
28 'mhcnp': mhcnp_methods,
29 'bcell': bcell_methods}
30 all_methods = set(mhci_methods + mhcii_methods +
31 mhcnp_methods + bcell_methods)
32 prediction_lengths = {'mhci': range(8, 16),
33 'mhcii': range(11, 31),
34 'processing': range(8, 15),
35 'mhcnp': range(8, 12),
36 'bcell': range(8, 16)}
37
38
39 def parse_alleles(allelefile, lengths):
40 alleles = []
41 lengths = []
42 with open(allelefile, 'r') as fh:
43 for i, line in enumerate(fh):
44 fields = line.strip().split(',')
45 allele = fields[0].strip()
46 if allele:
47 if len(fields) > 1:
48 for alen in fields[1:]:
49 alleles.append(allele)
50 lengths.append(alen)
51 elif lengths:
52 for alen in str(lengths).split(','):
53 alleles.append(allele)
54 lengths.append(alen)
55 else:
56 alleles.append(allele)
57 return (alleles, lengths)
58
59
60 def query(url, prediction, seq, allele, length, results,
61 seqid=None, method='recommended', proteasome=None,
62 timeout=300, retries=3, sleep=300, debug=False):
63 params = dict()
64 if method:
65 params['method'] = method.encode()
66 if proteasome:
67 params['proteasome'] = proteasome.encode()
68 params['sequence_text'] = seq.strip().encode()
69 if allele is not None:
70 params['allele'] = allele.encode()
71 if length is not None:
72 if prediction == 'bcell':
73 params['window_size'] = str(length).encode()
74 else:
75 params['length'] = str(length).encode()
76 req_data = urlencode(params)
77 if debug:
78 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr)
79 retries = max(0, retries) + 1
80 for retry in range(1, retries):
81 response = None
82 try:
83 response = urlopen(url, data=req_data.encode('utf-8'),
84 timeout=timeout)
85 if response and response.getcode() == 200:
86 data = [line.decode() for line in response.readlines()]
87 if debug:
88 print(data, file=sys.stderr)
89 rslts = results['prediction']['entries']
90 for ln, line in enumerate(data):
91 if 'invalid' in line.lower() or 'tools_api.html' in line:
92 msg = '%s %s\n%s' % (url, unquote(req_data),
93 ''.join(data))
94 warn_err(msg, exit_code=1)
95 if line.find('eptide') > 0:
96 results['prediction']['header'] = "#%s%s" %\
97 ("ID\t" if seqid else "", line)
98 continue
99 elif method == 'Bepipred' and line.find('Residue') > 0:
100 results['detail']['header'] = "#%s%s" %\
101 ("ID\t" if seqid else "", line)
102 rslts = results['detail']['entries']
103 continue
104 if seqid:
105 rslts.extend("%s\t%s" % (seqid, line))
106 else:
107 rslts.extend(line)
108 break
109 else:
110 code = response.getcode() if response else 1
111 warn_err("Error connecting to IEDB server\n",
112 exit_code=code)
113 except HTTPError as e:
114 code = None if retry < retries else e.code
115 warn_err("%d of %d Error connecting to IEDB server %s\n" %
116 (retry, retries, e),
117 exit_code=code)
118 time.sleep(sleep)
119 except Exception as e:
120 warn_err("Error connecting to IEDB server %s\n" % e,
121 exit_code=3)
122 return results
123
124
125 def warn_err(msg, exit_code=1):
126 sys.stderr.write(msg)
127 sys.stderr.flush()
128 if exit_code:
129 sys.exit(exit_code)
130
131
132 def __main__():
133 # Parse Command Line
134 parser = argparse.ArgumentParser(description='', epilog='')
135 parser.add_argument('-p', '--prediction',
136 default='mhci',
137 choices=prediction_methods.keys(),
138 help='IEDB API prediction service')
139 parser.add_argument('-s', '--sequence',
140 action="append",
141 default=None,
142 help='Peptide Sequence')
143 parser.add_argument('-m', '--method',
144 default='recommended',
145 choices=all_methods,
146 help='prediction method')
147 parser.add_argument('-P', '--proteasome',
148 default=None,
149 choices=['immuno', 'constitutive'],
150 help='IEDB processing proteasome type')
151 parser.add_argument('-a', '--allele',
152 action="append",
153 default=[],
154 help='Alleles for which to make predictions')
155 parser.add_argument('-A', '--allelefile',
156 default=None,
157 help='File of HLA alleles')
158 parser.add_argument('-l', '--length',
159 action="append",
160 default=[],
161 help='lengths for which to make predictions, ' +
162 '1 per allele')
163 parser.add_argument('-w', '--window_size',
164 type=int,
165 default=None,
166 help='window_size for bcell prediction')
167 parser.add_argument('-i', '--input',
168 default=None,
169 help='Input file for peptide sequences ' +
170 '(fasta or tabular)')
171 parser.add_argument('-c', '--column',
172 default=None,
173 help='Peptide Column in a tabular input file')
174 parser.add_argument('-C', '--id_column',
175 default=None,
176 help='ID Column in a tabular input file')
177 parser.add_argument('-o', '--output',
178 default=None,
179 help='Output file for query results')
180 parser.add_argument('-O', '--output2',
181 default='iedb_results2',
182 help='Output file for secondary query results')
183 parser.add_argument('-t', '--timeout',
184 type=int,
185 default=600,
186 help='Seconds to wait for server response')
187 parser.add_argument('-r', '--retries',
188 type=int,
189 default=5,
190 help='Number of times to retry server query')
191 parser.add_argument('-S', '--sleep',
192 type=int,
193 default=300,
194 help='Seconds to wait between retries')
195 parser.add_argument('-d', '--debug',
196 action='store_true',
197 default=False,
198 help='Turn on wrapper debugging to stderr')
199 args = parser.parse_args()
200
201 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'
202
203 if args.prediction != 'bcell':
204 if not args.allele and not args.allelefile:
205 warn_err('-a allele or -A allelefile required\n', exit_code=1)
206
207 if not (args.sequence or args.input):
208 warn_err('NO Sequences given: ' +
209 'either -s sequence or -i input_file is required\n',
210 exit_code=1)
211
212 if args.output is not None:
213 try:
214 outputPath = os.path.abspath(args.output)
215 outputFile = open(outputPath, 'w')
216 except Exception as e:
217 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
218 else:
219 outputFile = sys.stdout
220
221 # params
222 alleles = []
223 lengths = []
224 if args.prediction == 'bcell' and args.window_size is not None:
225 lengths.append(str(args.window_size))
226 else:
227 if args.allelefile:
228 (alleles, lengths) = parse_alleles(args.allelefile, args.length)
229 if args.allele:
230 for i, allele in enumerate(args.allele):
231 alleles.append(allele)
232 alen = args.length[i] if i < len(args.length) else args.length[-1]
233 lengths.append(alen)
234 allele = ','.join(alleles) if alleles else None
235 length = ','.join(lengths) if lengths else None
236 method = args.method
237 proteasome = args.proteasome if args.prediction == 'processcing' else None
238 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\
239 args.prediction
240
241 # results
242 results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}}
243
244 if args.sequence:
245 for i, seq in enumerate(args.sequence):
246 seqid = 'pep_%d' % i
247 query(url, args.prediction, seq, allele, length, results,
248 seqid=seqid, method=method, proteasome=proteasome,
249 timeout=args.timeout, retries=args.retries,
250 sleep=args.sleep, debug=args.debug)
251 if args.input:
252 try:
253 fh = open(args.input, 'r')
254 if args.column: # tabular
255 col = int(args.column)
256 idcol = int(args.id_column) if args.id_column else None
257 for i, line in enumerate(fh):
258 fields = line.rstrip('\r\n').split('\t')
259 if len(fields) > col:
260 seq = re.sub('[_*]', '', fields[col].strip())
261 if re.match(aapat, seq):
262 if idcol is not None and idcol < len(fields):
263 seqid = fields[idcol]
264 else:
265 seqid = 'pep_%d' % i
266 query(url, args.prediction, seq, allele, length,
267 results, seqid=seqid,
268 method=method, proteasome=proteasome,
269 timeout=args.timeout, retries=args.retries,
270 sleep=args.sleep, debug=args.debug)
271 else:
272 warn_err('Line %d, Not a peptide: %s\n' % (i, seq),
273 exit_code=None)
274 else: # fasta
275 seqid = None
276 seq = ''
277 for i, line in enumerate(fh):
278 if line.startswith('>'):
279 if seqid and len(seq) > 0:
280 query(url, args.prediction, seq, allele, length,
281 results, seqid=seqid,
282 method=method, proteasome=proteasome,
283 timeout=args.timeout, retries=args.retries,
284 sleep=args.sleep, debug=args.debug)
285 seqid = line[1:].strip()
286 seq = ''
287 else:
288 seq += line.strip()
289 if seqid and len(seq) > 0:
290 query(url, args.prediction, seq, allele, length,
291 results, seqid=seqid,
292 method=method, proteasome=proteasome,
293 timeout=args.timeout, retries=args.retries,
294 sleep=args.sleep, debug=args.debug)
295 fh.close()
296 except Exception as e:
297 warn_err("Unable to open input file: %s\n" % e, exit_code=1)
298
299 if results['prediction']['header']:
300 outputFile.write(results['prediction']['header'])
301 for line in results['prediction']['entries']:
302 outputFile.write(line)
303 if results['detail']['entries']:
304 if args.output2:
305 try:
306 outPath = os.path.abspath(args.output2)
307 outFile = open(outPath, 'w')
308 except Exception as e:
309 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
310 else:
311 outFile = sys.stdout
312 if results['detail']['header']:
313 outFile.write(results['detail']['header'])
314 for line in results['detail']['entries']:
315 outFile.write(line)
316
317
318 if __name__ == "__main__":
319 __main__()