comparison iedb_api.py @ 4:a14128950578 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 98a9dd3bd9c567e8b8e43ac5b54c4ba75a6fe78d"
author jjohnson
date Fri, 28 Feb 2020 15:45:14 -0500
parents 153d5fa7af53
children
comparison
equal deleted inserted replaced
3:153d5fa7af53 4:a14128950578
32 prediction_lengths = {'mhci': range(8, 16), 32 prediction_lengths = {'mhci': range(8, 16),
33 'mhcii': range(11, 31), 33 'mhcii': range(11, 31),
34 'processing': range(8, 15), 34 'processing': range(8, 15),
35 'mhcnp': range(8, 12), 35 'mhcnp': range(8, 12),
36 'bcell': range(8, 16)} 36 'bcell': range(8, 16)}
37 prediction_species = {'mhci': [],
38 'mhcii': range(11, 31),
39 'processing': range(8, 15),
40 'mhcnp': range(8, 12),
41 'bcell': range(8, 16)}
42
43
44 def parse_alleles(allelefile, lengths):
45 alleles = []
46 lengths = []
47 with open(allelefile, 'r') as fh:
48 for i, line in enumerate(fh):
49 fields = line.strip().split(',')
50 allele = fields[0].strip()
51 if allele:
52 if len(fields) > 1:
53 for alen in fields[1:]:
54 alleles.append(allele)
55 lengths.append(alen)
56 elif lengths:
57 for alen in str(lengths).split(','):
58 alleles.append(allele)
59 lengths.append(alen)
60 else:
61 alleles.append(allele)
62 return (alleles, lengths)
63
64
65 def query(url, prediction, seq, allele, length, results,
66 seqid=None, method='recommended', proteasome=None,
67 timeout=300, retries=3, sleep=300, debug=False):
68 params = dict()
69 if method:
70 params['method'] = method.encode()
71 if proteasome:
72 params['proteasome'] = proteasome.encode()
73 params['sequence_text'] = seq.strip().encode()
74 if allele is not None:
75 params['allele'] = allele.encode()
76 if length is not None:
77 if prediction == 'bcell':
78 params['window_size'] = str(length).encode()
79 else:
80 params['length'] = str(length).encode()
81 req_data = urlencode(params)
82 if debug:
83 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr)
84 retries = max(0, retries) + 1
85 for retry in range(1, retries):
86 response = None
87 try:
88 response = urlopen(url, data=req_data.encode('utf-8'),
89 timeout=timeout)
90 if response and response.getcode() == 200:
91 data = [line.decode() for line in response.readlines()]
92 if debug:
93 print(data, file=sys.stderr)
94 rslts = results['prediction']['entries']
95 for ln, line in enumerate(data):
96 if 'invalid' in line.lower() or 'tools_api.html' in line:
97 msg = '%s %s\n%s' % (url, unquote(req_data),
98 ''.join(data))
99 warn_err(msg, exit_code=1)
100 if line.find('eptide') > 0:
101 results['prediction']['header'] = "#%s%s" %\
102 ("ID\t" if seqid else "", line)
103 continue
104 elif method == 'Bepipred' and line.find('Residue') > 0:
105 results['detail']['header'] = "#%s%s" %\
106 ("ID\t" if seqid else "", line)
107 rslts = results['detail']['entries']
108 continue
109 if seqid:
110 rslts.extend("%s\t%s" % (seqid, line))
111 else:
112 rslts.extend(line)
113 break
114 else:
115 code = response.getcode() if response else 1
116 warn_err("Error connecting to IEDB server\n",
117 exit_code=code)
118 except HTTPError as e:
119 code = None if retry < retries else e.code
120 warn_err("%d of %d Error connecting to IEDB server %s\n" %
121 (retry, retries, e),
122 exit_code=code)
123 time.sleep(sleep)
124 except Exception as e:
125 warn_err("Error connecting to IEDB server %s\n" % e,
126 exit_code=3)
127 return results
37 128
38 129
39 def warn_err(msg, exit_code=1): 130 def warn_err(msg, exit_code=1):
40 sys.stderr.write(msg) 131 sys.stderr.write(msg)
132 sys.stderr.flush()
41 if exit_code: 133 if exit_code:
42 sys.exit(exit_code) 134 sys.exit(exit_code)
43 135
44 136
45 def __main__(): 137 def __main__():
63 help='IEDB processing proteasome type') 155 help='IEDB processing proteasome type')
64 parser.add_argument('-a', '--allele', 156 parser.add_argument('-a', '--allele',
65 action="append", 157 action="append",
66 default=[], 158 default=[],
67 help='Alleles for which to make predictions') 159 help='Alleles for which to make predictions')
160 parser.add_argument('-A', '--allelefile',
161 default=None,
162 help='File of HLA alleles')
68 parser.add_argument('-l', '--length', 163 parser.add_argument('-l', '--length',
69 action="append", 164 action="append",
70 default=[], 165 default=[],
71 help='lengths for which to make predictions, ' + 166 help='lengths for which to make predictions, ' +
72 '1 per allele') 167 '1 per allele')
108 help='Turn on wrapper debugging to stderr') 203 help='Turn on wrapper debugging to stderr')
109 args = parser.parse_args() 204 args = parser.parse_args()
110 205
111 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' 206 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'
112 207
113 if not args.allele and args.prediction != 'bcell': 208 if args.prediction != 'bcell':
114 warn_err('-a allele required\n', exit_code=1) 209 if not args.allele and not args.allelefile:
210 warn_err('-a allele or -A allelefile required\n', exit_code=1)
115 211
116 if not (args.sequence or args.input): 212 if not (args.sequence or args.input):
117 warn_err('NO Sequences given: ' + 213 warn_err('NO Sequences given: ' +
118 'either -s sequence or -i input_file is required\n', 214 'either -s sequence or -i input_file is required\n',
119 exit_code=1) 215 exit_code=1)
125 except Exception as e: 221 except Exception as e:
126 warn_err("Unable to open output file: %s\n" % e, exit_code=1) 222 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
127 else: 223 else:
128 outputFile = sys.stdout 224 outputFile = sys.stdout
129 225
226 alleles = []
227 lengths = []
228 # TODO parse alleles from the args.alleles file
229 if args.prediction == 'bcell' and args.window_size is not None:
230 lengths.append(str(args.window_size))
231 else:
232 if args.allelefile:
233 (alleles, lengths) = parse_alleles(args.allelefile, args.length)
234 if args.allele:
235 for i, allele in enumerate(args.allele):
236 alleles.append(allele)
237 alen = args.length[i] if i < len(args.length) else args.length[-1]
238 lengths.append(alen)
239 allele = ','.join(alleles) if alleles else None
240 length = ','.join(lengths) if lengths else None
241 method = args.method
242 proteasome = args.proteasome if args.prediction == 'processcing' else None
130 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ 243 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\
131 args.prediction 244 args.prediction
132 len_param = 'length' if args.prediction != 'bcell' else 'window_size' 245 # results
133 246 results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}}
134 # TODO parse alleles from the args.alleles file
135 alleles = ','.join(args.allele) if args.prediction != 'bcell' else None
136 lengths = ','.join(args.length)
137 if args.prediction == 'bcell':
138 lengths = args.window_size
139 method = args.method
140 proteasome = args.proteasome if args.prediction == 'processcing' else None
141 global header
142 header = None
143 results = []
144 global header2
145 header2 = None
146 results2 = []
147
148 sequence_text = []
149
150 def add_seq(seqid, seq):
151 sid = seqid if seqid else "peptide%d" % len(sequence_text)
152 sequence_text.append(">%s\n%s" % (sid, seq))
153
154 def query(url, seq, allele, length, seqid=None, method='recommended'):
155 global header
156 global header2
157 params = dict()
158 if method:
159 params['method'] = method.encode()
160 if proteasome:
161 params['proteasome'] = proteasome.encode()
162 params['sequence_text'] = seq.encode()
163 if allele is not None:
164 params['allele'] = allele.encode()
165 if length is not None:
166 params[len_param] = str(length).encode()
167 req_data = urlencode(params)
168 if args.debug:
169 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr)
170 retries = max(0, args.retries) + 1
171 for retry in range(1, retries):
172 response = None
173 try:
174 response = urlopen(url, data=req_data.encode('utf-8'),
175 timeout=args.timeout)
176 if response and response.getcode() == 200:
177 data = [line.decode() for line in response.readlines()]
178 if args.debug:
179 print(data, file=sys.stderr)
180 rslts = results
181 for ln, line in enumerate(data):
182 if line.lower().find('invalid') >= 0:
183 msg = '%s %s\n%s' % (url, unquote(req_data),
184 ''.join(data))
185 warn_err(msg, exit_code=1)
186 if line.find('eptide') > 0:
187 header = "#%s%s" %\
188 ("ID\t" if seqid else "", line)
189 if args.debug:
190 print(header, file=sys.stderr)
191 continue
192 elif method == 'Bepipred' and line.find('Residue') > 0:
193 header2 = "#%s%s" %\
194 ("ID\t" if seqid else "", line)
195 if args.debug:
196 print(header2, file=sys.stderr)
197 rslts = results2
198 continue
199 if seqid:
200 rslts.extend("%s\t%s" % (seqid, line))
201 else:
202 rslts.extend(line)
203 break
204 else:
205 code = response.getcode() if response else 1
206 warn_err("Error connecting to IEDB server\n",
207 exit_code=code)
208 except HTTPError as e:
209 code = None if retry < args.retries else e.code
210 warn_err("%d of %d Error connecting to IEDB server %s\n" %
211 (retry, retries, e),
212 exit_code=code)
213 time.sleep(args.sleep)
214 except Exception as e:
215 warn_err("Error connecting to IEDB server %s\n" % e,
216 exit_code=3)
217 247
218 if args.sequence: 248 if args.sequence:
219 for i, seq in enumerate(args.sequence): 249 for i, seq in enumerate(args.sequence):
220 query(url, seq, alleles, lengths, seqid=None, method=method) 250 seqid = 'pep_%d' % i
251 query(url, args.prediction, seq, allele, length, results,
252 seqid=seqid, method=method, proteasome=proteasome,
253 timeout=args.timeout, retries=args.retries,
254 sleep=args.sleep, debug=args.debug)
221 if args.input: 255 if args.input:
222 try: 256 try:
223 fh = open(args.input, 'r') 257 fh = open(args.input, 'r')
224 if args.column: # tabular 258 if args.column: # tabular
225 col = int(args.column) 259 col = int(args.column)
226 idcol = int(args.id_column) if args.id_column else None 260 idcol = int(args.id_column) if args.id_column else None
227 for i, line in enumerate(fh): 261 for i, line in enumerate(fh):
228 fields = line.split('\t') 262 fields = line.rstrip('\r\n').split('\t')
229 if len(fields) > col: 263 if len(fields) > col:
230 seq = re.sub('[_*]', '', fields[col]) 264 seq = re.sub('[_*]', '', fields[col].strip())
231 if re.match(aapat, seq): 265 if re.match(aapat, seq):
232 if idcol is not None and idcol < len(fields): 266 if idcol is not None and idcol < len(fields):
233 seqid = fields[idcol] 267 seqid = fields[idcol]
234 else: 268 else:
235 seqid = None 269 seqid = 'pep_%d' % i
236 query(url, seq, alleles, lengths, 270 query(url, args.prediction, seq, allele, length,
237 seqid=seqid, method=method) 271 results, seqid=seqid,
272 method=method, proteasome=proteasome,
273 timeout=args.timeout, retries=args.retries,
274 sleep=args.sleep, debug=args.debug)
238 else: 275 else:
239 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), 276 warn_err('Line %d, Not a peptide: %s\n' % (i, seq),
240 exit_code=None) 277 exit_code=None)
241 else: # fasta 278 else: # fasta
242 seqid = None 279 seqid = None
243 seq = '' 280 seq = ''
244 for i, line in enumerate(fh): 281 for i, line in enumerate(fh):
245 if line.startswith('>'): 282 if line.startswith('>'):
246 if seqid and len(seq) > 0: 283 if seqid and len(seq) > 0:
247 query(url, seq, alleles, lengths, 284 query(url, args.prediction, seq, allele, length,
248 seqid=seqid, method=method) 285 results, seqid=seqid,
286 method=method, proteasome=proteasome,
287 timeout=args.timeout, retries=args.retries,
288 sleep=args.sleep, debug=args.debug)
249 seqid = line[1:].strip() 289 seqid = line[1:].strip()
250 seq = '' 290 seq = ''
251 else: 291 else:
252 seq += line.strip() 292 seq += line.strip()
253 if seqid and len(seq) > 0: 293 if seqid and len(seq) > 0:
254 query(url, seq, alleles, lengths, 294 query(url, args.prediction, seq, allele, length,
255 seqid=seqid, method=method) 295 results, seqid=seqid,
296 method=method, proteasome=proteasome,
297 timeout=args.timeout, retries=args.retries,
298 sleep=args.sleep, debug=args.debug)
256 fh.close() 299 fh.close()
257 except Exception as e: 300 except Exception as e:
258 warn_err("Unable to open input file: %s\n" % e, exit_code=1) 301 warn_err("Unable to open input file: %s\n" % e, exit_code=1)
259 302
260 if header: 303 if results['prediction']['header']:
261 outputFile.write(header) 304 outputFile.write(results['prediction']['header'])
262 for line in results: 305 for line in results['prediction']['entries']:
263 outputFile.write(line) 306 outputFile.write(line)
264 if results2: 307 if results['detail']['entries']:
265 if args.output2: 308 if args.output2:
266 try: 309 try:
267 outPath = os.path.abspath(args.output2) 310 outPath = os.path.abspath(args.output2)
268 outFile = open(outPath, 'w') 311 outFile = open(outPath, 'w')
269 except Exception as e: 312 except Exception as e:
270 warn_err("Unable to open output file: %s\n" % e, exit_code=1) 313 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
271 else: 314 else:
272 outFile = sys.stdout 315 outFile = sys.stdout
273 if header2: 316 if results['detail']['header']:
274 outFile.write(header2) 317 outFile.write(results['detail']['header'])
275 for line in results2: 318 for line in results['detail']['entries']:
276 outFile.write(line) 319 outFile.write(line)
277 320
278 321
279 if __name__ == "__main__": 322 if __name__ == "__main__":
280 __main__() 323 __main__()