Mercurial > repos > jjohnson > iedb_api
comparison iedb_api.py @ 4:a14128950578 draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 98a9dd3bd9c567e8b8e43ac5b54c4ba75a6fe78d"
author | jjohnson |
---|---|
date | Fri, 28 Feb 2020 15:45:14 -0500 |
parents | 153d5fa7af53 |
children |
comparison
equal
deleted
inserted
replaced
3:153d5fa7af53 | 4:a14128950578 |
---|---|
32 prediction_lengths = {'mhci': range(8, 16), | 32 prediction_lengths = {'mhci': range(8, 16), |
33 'mhcii': range(11, 31), | 33 'mhcii': range(11, 31), |
34 'processing': range(8, 15), | 34 'processing': range(8, 15), |
35 'mhcnp': range(8, 12), | 35 'mhcnp': range(8, 12), |
36 'bcell': range(8, 16)} | 36 'bcell': range(8, 16)} |
37 prediction_species = {'mhci': [], | |
38 'mhcii': range(11, 31), | |
39 'processing': range(8, 15), | |
40 'mhcnp': range(8, 12), | |
41 'bcell': range(8, 16)} | |
42 | |
43 | |
44 def parse_alleles(allelefile, lengths): | |
45 alleles = [] | |
46 lengths = [] | |
47 with open(allelefile, 'r') as fh: | |
48 for i, line in enumerate(fh): | |
49 fields = line.strip().split(',') | |
50 allele = fields[0].strip() | |
51 if allele: | |
52 if len(fields) > 1: | |
53 for alen in fields[1:]: | |
54 alleles.append(allele) | |
55 lengths.append(alen) | |
56 elif lengths: | |
57 for alen in str(lengths).split(','): | |
58 alleles.append(allele) | |
59 lengths.append(alen) | |
60 else: | |
61 alleles.append(allele) | |
62 return (alleles, lengths) | |
63 | |
64 | |
65 def query(url, prediction, seq, allele, length, results, | |
66 seqid=None, method='recommended', proteasome=None, | |
67 timeout=300, retries=3, sleep=300, debug=False): | |
68 params = dict() | |
69 if method: | |
70 params['method'] = method.encode() | |
71 if proteasome: | |
72 params['proteasome'] = proteasome.encode() | |
73 params['sequence_text'] = seq.strip().encode() | |
74 if allele is not None: | |
75 params['allele'] = allele.encode() | |
76 if length is not None: | |
77 if prediction == 'bcell': | |
78 params['window_size'] = str(length).encode() | |
79 else: | |
80 params['length'] = str(length).encode() | |
81 req_data = urlencode(params) | |
82 if debug: | |
83 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) | |
84 retries = max(0, retries) + 1 | |
85 for retry in range(1, retries): | |
86 response = None | |
87 try: | |
88 response = urlopen(url, data=req_data.encode('utf-8'), | |
89 timeout=timeout) | |
90 if response and response.getcode() == 200: | |
91 data = [line.decode() for line in response.readlines()] | |
92 if debug: | |
93 print(data, file=sys.stderr) | |
94 rslts = results['prediction']['entries'] | |
95 for ln, line in enumerate(data): | |
96 if 'invalid' in line.lower() or 'tools_api.html' in line: | |
97 msg = '%s %s\n%s' % (url, unquote(req_data), | |
98 ''.join(data)) | |
99 warn_err(msg, exit_code=1) | |
100 if line.find('eptide') > 0: | |
101 results['prediction']['header'] = "#%s%s" %\ | |
102 ("ID\t" if seqid else "", line) | |
103 continue | |
104 elif method == 'Bepipred' and line.find('Residue') > 0: | |
105 results['detail']['header'] = "#%s%s" %\ | |
106 ("ID\t" if seqid else "", line) | |
107 rslts = results['detail']['entries'] | |
108 continue | |
109 if seqid: | |
110 rslts.extend("%s\t%s" % (seqid, line)) | |
111 else: | |
112 rslts.extend(line) | |
113 break | |
114 else: | |
115 code = response.getcode() if response else 1 | |
116 warn_err("Error connecting to IEDB server\n", | |
117 exit_code=code) | |
118 except HTTPError as e: | |
119 code = None if retry < retries else e.code | |
120 warn_err("%d of %d Error connecting to IEDB server %s\n" % | |
121 (retry, retries, e), | |
122 exit_code=code) | |
123 time.sleep(sleep) | |
124 except Exception as e: | |
125 warn_err("Error connecting to IEDB server %s\n" % e, | |
126 exit_code=3) | |
127 return results | |
37 | 128 |
38 | 129 |
39 def warn_err(msg, exit_code=1): | 130 def warn_err(msg, exit_code=1): |
40 sys.stderr.write(msg) | 131 sys.stderr.write(msg) |
132 sys.stderr.flush() | |
41 if exit_code: | 133 if exit_code: |
42 sys.exit(exit_code) | 134 sys.exit(exit_code) |
43 | 135 |
44 | 136 |
45 def __main__(): | 137 def __main__(): |
63 help='IEDB processing proteasome type') | 155 help='IEDB processing proteasome type') |
64 parser.add_argument('-a', '--allele', | 156 parser.add_argument('-a', '--allele', |
65 action="append", | 157 action="append", |
66 default=[], | 158 default=[], |
67 help='Alleles for which to make predictions') | 159 help='Alleles for which to make predictions') |
160 parser.add_argument('-A', '--allelefile', | |
161 default=None, | |
162 help='File of HLA alleles') | |
68 parser.add_argument('-l', '--length', | 163 parser.add_argument('-l', '--length', |
69 action="append", | 164 action="append", |
70 default=[], | 165 default=[], |
71 help='lengths for which to make predictions, ' + | 166 help='lengths for which to make predictions, ' + |
72 '1 per allele') | 167 '1 per allele') |
108 help='Turn on wrapper debugging to stderr') | 203 help='Turn on wrapper debugging to stderr') |
109 args = parser.parse_args() | 204 args = parser.parse_args() |
110 | 205 |
111 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | 206 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' |
112 | 207 |
113 if not args.allele and args.prediction != 'bcell': | 208 if args.prediction != 'bcell': |
114 warn_err('-a allele required\n', exit_code=1) | 209 if not args.allele and not args.allelefile: |
210 warn_err('-a allele or -A allelefile required\n', exit_code=1) | |
115 | 211 |
116 if not (args.sequence or args.input): | 212 if not (args.sequence or args.input): |
117 warn_err('NO Sequences given: ' + | 213 warn_err('NO Sequences given: ' + |
118 'either -s sequence or -i input_file is required\n', | 214 'either -s sequence or -i input_file is required\n', |
119 exit_code=1) | 215 exit_code=1) |
125 except Exception as e: | 221 except Exception as e: |
126 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | 222 warn_err("Unable to open output file: %s\n" % e, exit_code=1) |
127 else: | 223 else: |
128 outputFile = sys.stdout | 224 outputFile = sys.stdout |
129 | 225 |
226 alleles = [] | |
227 lengths = [] | |
228 # TODO parse alleles from the args.alleles file | |
229 if args.prediction == 'bcell' and args.window_size is not None: | |
230 lengths.append(str(args.window_size)) | |
231 else: | |
232 if args.allelefile: | |
233 (alleles, lengths) = parse_alleles(args.allelefile, args.length) | |
234 if args.allele: | |
235 for i, allele in enumerate(args.allele): | |
236 alleles.append(allele) | |
237 alen = args.length[i] if i < len(args.length) else args.length[-1] | |
238 lengths.append(alen) | |
239 allele = ','.join(alleles) if alleles else None | |
240 length = ','.join(lengths) if lengths else None | |
241 method = args.method | |
242 proteasome = args.proteasome if args.prediction == 'processcing' else None | |
130 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ | 243 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ |
131 args.prediction | 244 args.prediction |
132 len_param = 'length' if args.prediction != 'bcell' else 'window_size' | 245 # results |
133 | 246 results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}} |
134 # TODO parse alleles from the args.alleles file | |
135 alleles = ','.join(args.allele) if args.prediction != 'bcell' else None | |
136 lengths = ','.join(args.length) | |
137 if args.prediction == 'bcell': | |
138 lengths = args.window_size | |
139 method = args.method | |
140 proteasome = args.proteasome if args.prediction == 'processcing' else None | |
141 global header | |
142 header = None | |
143 results = [] | |
144 global header2 | |
145 header2 = None | |
146 results2 = [] | |
147 | |
148 sequence_text = [] | |
149 | |
150 def add_seq(seqid, seq): | |
151 sid = seqid if seqid else "peptide%d" % len(sequence_text) | |
152 sequence_text.append(">%s\n%s" % (sid, seq)) | |
153 | |
154 def query(url, seq, allele, length, seqid=None, method='recommended'): | |
155 global header | |
156 global header2 | |
157 params = dict() | |
158 if method: | |
159 params['method'] = method.encode() | |
160 if proteasome: | |
161 params['proteasome'] = proteasome.encode() | |
162 params['sequence_text'] = seq.encode() | |
163 if allele is not None: | |
164 params['allele'] = allele.encode() | |
165 if length is not None: | |
166 params[len_param] = str(length).encode() | |
167 req_data = urlencode(params) | |
168 if args.debug: | |
169 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) | |
170 retries = max(0, args.retries) + 1 | |
171 for retry in range(1, retries): | |
172 response = None | |
173 try: | |
174 response = urlopen(url, data=req_data.encode('utf-8'), | |
175 timeout=args.timeout) | |
176 if response and response.getcode() == 200: | |
177 data = [line.decode() for line in response.readlines()] | |
178 if args.debug: | |
179 print(data, file=sys.stderr) | |
180 rslts = results | |
181 for ln, line in enumerate(data): | |
182 if line.lower().find('invalid') >= 0: | |
183 msg = '%s %s\n%s' % (url, unquote(req_data), | |
184 ''.join(data)) | |
185 warn_err(msg, exit_code=1) | |
186 if line.find('eptide') > 0: | |
187 header = "#%s%s" %\ | |
188 ("ID\t" if seqid else "", line) | |
189 if args.debug: | |
190 print(header, file=sys.stderr) | |
191 continue | |
192 elif method == 'Bepipred' and line.find('Residue') > 0: | |
193 header2 = "#%s%s" %\ | |
194 ("ID\t" if seqid else "", line) | |
195 if args.debug: | |
196 print(header2, file=sys.stderr) | |
197 rslts = results2 | |
198 continue | |
199 if seqid: | |
200 rslts.extend("%s\t%s" % (seqid, line)) | |
201 else: | |
202 rslts.extend(line) | |
203 break | |
204 else: | |
205 code = response.getcode() if response else 1 | |
206 warn_err("Error connecting to IEDB server\n", | |
207 exit_code=code) | |
208 except HTTPError as e: | |
209 code = None if retry < args.retries else e.code | |
210 warn_err("%d of %d Error connecting to IEDB server %s\n" % | |
211 (retry, retries, e), | |
212 exit_code=code) | |
213 time.sleep(args.sleep) | |
214 except Exception as e: | |
215 warn_err("Error connecting to IEDB server %s\n" % e, | |
216 exit_code=3) | |
217 | 247 |
218 if args.sequence: | 248 if args.sequence: |
219 for i, seq in enumerate(args.sequence): | 249 for i, seq in enumerate(args.sequence): |
220 query(url, seq, alleles, lengths, seqid=None, method=method) | 250 seqid = 'pep_%d' % i |
251 query(url, args.prediction, seq, allele, length, results, | |
252 seqid=seqid, method=method, proteasome=proteasome, | |
253 timeout=args.timeout, retries=args.retries, | |
254 sleep=args.sleep, debug=args.debug) | |
221 if args.input: | 255 if args.input: |
222 try: | 256 try: |
223 fh = open(args.input, 'r') | 257 fh = open(args.input, 'r') |
224 if args.column: # tabular | 258 if args.column: # tabular |
225 col = int(args.column) | 259 col = int(args.column) |
226 idcol = int(args.id_column) if args.id_column else None | 260 idcol = int(args.id_column) if args.id_column else None |
227 for i, line in enumerate(fh): | 261 for i, line in enumerate(fh): |
228 fields = line.split('\t') | 262 fields = line.rstrip('\r\n').split('\t') |
229 if len(fields) > col: | 263 if len(fields) > col: |
230 seq = re.sub('[_*]', '', fields[col]) | 264 seq = re.sub('[_*]', '', fields[col].strip()) |
231 if re.match(aapat, seq): | 265 if re.match(aapat, seq): |
232 if idcol is not None and idcol < len(fields): | 266 if idcol is not None and idcol < len(fields): |
233 seqid = fields[idcol] | 267 seqid = fields[idcol] |
234 else: | 268 else: |
235 seqid = None | 269 seqid = 'pep_%d' % i |
236 query(url, seq, alleles, lengths, | 270 query(url, args.prediction, seq, allele, length, |
237 seqid=seqid, method=method) | 271 results, seqid=seqid, |
272 method=method, proteasome=proteasome, | |
273 timeout=args.timeout, retries=args.retries, | |
274 sleep=args.sleep, debug=args.debug) | |
238 else: | 275 else: |
239 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), | 276 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), |
240 exit_code=None) | 277 exit_code=None) |
241 else: # fasta | 278 else: # fasta |
242 seqid = None | 279 seqid = None |
243 seq = '' | 280 seq = '' |
244 for i, line in enumerate(fh): | 281 for i, line in enumerate(fh): |
245 if line.startswith('>'): | 282 if line.startswith('>'): |
246 if seqid and len(seq) > 0: | 283 if seqid and len(seq) > 0: |
247 query(url, seq, alleles, lengths, | 284 query(url, args.prediction, seq, allele, length, |
248 seqid=seqid, method=method) | 285 results, seqid=seqid, |
286 method=method, proteasome=proteasome, | |
287 timeout=args.timeout, retries=args.retries, | |
288 sleep=args.sleep, debug=args.debug) | |
249 seqid = line[1:].strip() | 289 seqid = line[1:].strip() |
250 seq = '' | 290 seq = '' |
251 else: | 291 else: |
252 seq += line.strip() | 292 seq += line.strip() |
253 if seqid and len(seq) > 0: | 293 if seqid and len(seq) > 0: |
254 query(url, seq, alleles, lengths, | 294 query(url, args.prediction, seq, allele, length, |
255 seqid=seqid, method=method) | 295 results, seqid=seqid, |
296 method=method, proteasome=proteasome, | |
297 timeout=args.timeout, retries=args.retries, | |
298 sleep=args.sleep, debug=args.debug) | |
256 fh.close() | 299 fh.close() |
257 except Exception as e: | 300 except Exception as e: |
258 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | 301 warn_err("Unable to open input file: %s\n" % e, exit_code=1) |
259 | 302 |
260 if header: | 303 if results['prediction']['header']: |
261 outputFile.write(header) | 304 outputFile.write(results['prediction']['header']) |
262 for line in results: | 305 for line in results['prediction']['entries']: |
263 outputFile.write(line) | 306 outputFile.write(line) |
264 if results2: | 307 if results['detail']['entries']: |
265 if args.output2: | 308 if args.output2: |
266 try: | 309 try: |
267 outPath = os.path.abspath(args.output2) | 310 outPath = os.path.abspath(args.output2) |
268 outFile = open(outPath, 'w') | 311 outFile = open(outPath, 'w') |
269 except Exception as e: | 312 except Exception as e: |
270 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | 313 warn_err("Unable to open output file: %s\n" % e, exit_code=1) |
271 else: | 314 else: |
272 outFile = sys.stdout | 315 outFile = sys.stdout |
273 if header2: | 316 if results['detail']['header']: |
274 outFile.write(header2) | 317 outFile.write(results['detail']['header']) |
275 for line in results2: | 318 for line in results['detail']['entries']: |
276 outFile.write(line) | 319 outFile.write(line) |
277 | 320 |
278 | 321 |
279 if __name__ == "__main__": | 322 if __name__ == "__main__": |
280 __main__() | 323 __main__() |