Mercurial > repos > jjohnson > iedb_api
comparison iedb_api.py @ 4:a14128950578 draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 98a9dd3bd9c567e8b8e43ac5b54c4ba75a6fe78d"
| author | jjohnson |
|---|---|
| date | Fri, 28 Feb 2020 15:45:14 -0500 |
| parents | 153d5fa7af53 |
| children |
comparison
equal
deleted
inserted
replaced
| 3:153d5fa7af53 | 4:a14128950578 |
|---|---|
| 32 prediction_lengths = {'mhci': range(8, 16), | 32 prediction_lengths = {'mhci': range(8, 16), |
| 33 'mhcii': range(11, 31), | 33 'mhcii': range(11, 31), |
| 34 'processing': range(8, 15), | 34 'processing': range(8, 15), |
| 35 'mhcnp': range(8, 12), | 35 'mhcnp': range(8, 12), |
| 36 'bcell': range(8, 16)} | 36 'bcell': range(8, 16)} |
| 37 prediction_species = {'mhci': [], | |
| 38 'mhcii': range(11, 31), | |
| 39 'processing': range(8, 15), | |
| 40 'mhcnp': range(8, 12), | |
| 41 'bcell': range(8, 16)} | |
| 42 | |
| 43 | |
| 44 def parse_alleles(allelefile, lengths): | |
| 45 alleles = [] | |
| 46 lengths = [] | |
| 47 with open(allelefile, 'r') as fh: | |
| 48 for i, line in enumerate(fh): | |
| 49 fields = line.strip().split(',') | |
| 50 allele = fields[0].strip() | |
| 51 if allele: | |
| 52 if len(fields) > 1: | |
| 53 for alen in fields[1:]: | |
| 54 alleles.append(allele) | |
| 55 lengths.append(alen) | |
| 56 elif lengths: | |
| 57 for alen in str(lengths).split(','): | |
| 58 alleles.append(allele) | |
| 59 lengths.append(alen) | |
| 60 else: | |
| 61 alleles.append(allele) | |
| 62 return (alleles, lengths) | |
| 63 | |
| 64 | |
| 65 def query(url, prediction, seq, allele, length, results, | |
| 66 seqid=None, method='recommended', proteasome=None, | |
| 67 timeout=300, retries=3, sleep=300, debug=False): | |
| 68 params = dict() | |
| 69 if method: | |
| 70 params['method'] = method.encode() | |
| 71 if proteasome: | |
| 72 params['proteasome'] = proteasome.encode() | |
| 73 params['sequence_text'] = seq.strip().encode() | |
| 74 if allele is not None: | |
| 75 params['allele'] = allele.encode() | |
| 76 if length is not None: | |
| 77 if prediction == 'bcell': | |
| 78 params['window_size'] = str(length).encode() | |
| 79 else: | |
| 80 params['length'] = str(length).encode() | |
| 81 req_data = urlencode(params) | |
| 82 if debug: | |
| 83 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) | |
| 84 retries = max(0, retries) + 1 | |
| 85 for retry in range(1, retries): | |
| 86 response = None | |
| 87 try: | |
| 88 response = urlopen(url, data=req_data.encode('utf-8'), | |
| 89 timeout=timeout) | |
| 90 if response and response.getcode() == 200: | |
| 91 data = [line.decode() for line in response.readlines()] | |
| 92 if debug: | |
| 93 print(data, file=sys.stderr) | |
| 94 rslts = results['prediction']['entries'] | |
| 95 for ln, line in enumerate(data): | |
| 96 if 'invalid' in line.lower() or 'tools_api.html' in line: | |
| 97 msg = '%s %s\n%s' % (url, unquote(req_data), | |
| 98 ''.join(data)) | |
| 99 warn_err(msg, exit_code=1) | |
| 100 if line.find('eptide') > 0: | |
| 101 results['prediction']['header'] = "#%s%s" %\ | |
| 102 ("ID\t" if seqid else "", line) | |
| 103 continue | |
| 104 elif method == 'Bepipred' and line.find('Residue') > 0: | |
| 105 results['detail']['header'] = "#%s%s" %\ | |
| 106 ("ID\t" if seqid else "", line) | |
| 107 rslts = results['detail']['entries'] | |
| 108 continue | |
| 109 if seqid: | |
| 110 rslts.extend("%s\t%s" % (seqid, line)) | |
| 111 else: | |
| 112 rslts.extend(line) | |
| 113 break | |
| 114 else: | |
| 115 code = response.getcode() if response else 1 | |
| 116 warn_err("Error connecting to IEDB server\n", | |
| 117 exit_code=code) | |
| 118 except HTTPError as e: | |
| 119 code = None if retry < retries else e.code | |
| 120 warn_err("%d of %d Error connecting to IEDB server %s\n" % | |
| 121 (retry, retries, e), | |
| 122 exit_code=code) | |
| 123 time.sleep(sleep) | |
| 124 except Exception as e: | |
| 125 warn_err("Error connecting to IEDB server %s\n" % e, | |
| 126 exit_code=3) | |
| 127 return results | |
| 37 | 128 |
| 38 | 129 |
| 39 def warn_err(msg, exit_code=1): | 130 def warn_err(msg, exit_code=1): |
| 40 sys.stderr.write(msg) | 131 sys.stderr.write(msg) |
| 132 sys.stderr.flush() | |
| 41 if exit_code: | 133 if exit_code: |
| 42 sys.exit(exit_code) | 134 sys.exit(exit_code) |
| 43 | 135 |
| 44 | 136 |
| 45 def __main__(): | 137 def __main__(): |
| 63 help='IEDB processing proteasome type') | 155 help='IEDB processing proteasome type') |
| 64 parser.add_argument('-a', '--allele', | 156 parser.add_argument('-a', '--allele', |
| 65 action="append", | 157 action="append", |
| 66 default=[], | 158 default=[], |
| 67 help='Alleles for which to make predictions') | 159 help='Alleles for which to make predictions') |
| 160 parser.add_argument('-A', '--allelefile', | |
| 161 default=None, | |
| 162 help='File of HLA alleles') | |
| 68 parser.add_argument('-l', '--length', | 163 parser.add_argument('-l', '--length', |
| 69 action="append", | 164 action="append", |
| 70 default=[], | 165 default=[], |
| 71 help='lengths for which to make predictions, ' + | 166 help='lengths for which to make predictions, ' + |
| 72 '1 per allele') | 167 '1 per allele') |
| 108 help='Turn on wrapper debugging to stderr') | 203 help='Turn on wrapper debugging to stderr') |
| 109 args = parser.parse_args() | 204 args = parser.parse_args() |
| 110 | 205 |
| 111 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | 206 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' |
| 112 | 207 |
| 113 if not args.allele and args.prediction != 'bcell': | 208 if args.prediction != 'bcell': |
| 114 warn_err('-a allele required\n', exit_code=1) | 209 if not args.allele and not args.allelefile: |
| 210 warn_err('-a allele or -A allelefile required\n', exit_code=1) | |
| 115 | 211 |
| 116 if not (args.sequence or args.input): | 212 if not (args.sequence or args.input): |
| 117 warn_err('NO Sequences given: ' + | 213 warn_err('NO Sequences given: ' + |
| 118 'either -s sequence or -i input_file is required\n', | 214 'either -s sequence or -i input_file is required\n', |
| 119 exit_code=1) | 215 exit_code=1) |
| 125 except Exception as e: | 221 except Exception as e: |
| 126 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | 222 warn_err("Unable to open output file: %s\n" % e, exit_code=1) |
| 127 else: | 223 else: |
| 128 outputFile = sys.stdout | 224 outputFile = sys.stdout |
| 129 | 225 |
| 226 alleles = [] | |
| 227 lengths = [] | |
| 228 # TODO parse alleles from the args.alleles file | |
| 229 if args.prediction == 'bcell' and args.window_size is not None: | |
| 230 lengths.append(str(args.window_size)) | |
| 231 else: | |
| 232 if args.allelefile: | |
| 233 (alleles, lengths) = parse_alleles(args.allelefile, args.length) | |
| 234 if args.allele: | |
| 235 for i, allele in enumerate(args.allele): | |
| 236 alleles.append(allele) | |
| 237 alen = args.length[i] if i < len(args.length) else args.length[-1] | |
| 238 lengths.append(alen) | |
| 239 allele = ','.join(alleles) if alleles else None | |
| 240 length = ','.join(lengths) if lengths else None | |
| 241 method = args.method | |
| 242 proteasome = args.proteasome if args.prediction == 'processcing' else None | |
| 130 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ | 243 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ |
| 131 args.prediction | 244 args.prediction |
| 132 len_param = 'length' if args.prediction != 'bcell' else 'window_size' | 245 # results |
| 133 | 246 results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}} |
| 134 # TODO parse alleles from the args.alleles file | |
| 135 alleles = ','.join(args.allele) if args.prediction != 'bcell' else None | |
| 136 lengths = ','.join(args.length) | |
| 137 if args.prediction == 'bcell': | |
| 138 lengths = args.window_size | |
| 139 method = args.method | |
| 140 proteasome = args.proteasome if args.prediction == 'processcing' else None | |
| 141 global header | |
| 142 header = None | |
| 143 results = [] | |
| 144 global header2 | |
| 145 header2 = None | |
| 146 results2 = [] | |
| 147 | |
| 148 sequence_text = [] | |
| 149 | |
| 150 def add_seq(seqid, seq): | |
| 151 sid = seqid if seqid else "peptide%d" % len(sequence_text) | |
| 152 sequence_text.append(">%s\n%s" % (sid, seq)) | |
| 153 | |
| 154 def query(url, seq, allele, length, seqid=None, method='recommended'): | |
| 155 global header | |
| 156 global header2 | |
| 157 params = dict() | |
| 158 if method: | |
| 159 params['method'] = method.encode() | |
| 160 if proteasome: | |
| 161 params['proteasome'] = proteasome.encode() | |
| 162 params['sequence_text'] = seq.encode() | |
| 163 if allele is not None: | |
| 164 params['allele'] = allele.encode() | |
| 165 if length is not None: | |
| 166 params[len_param] = str(length).encode() | |
| 167 req_data = urlencode(params) | |
| 168 if args.debug: | |
| 169 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) | |
| 170 retries = max(0, args.retries) + 1 | |
| 171 for retry in range(1, retries): | |
| 172 response = None | |
| 173 try: | |
| 174 response = urlopen(url, data=req_data.encode('utf-8'), | |
| 175 timeout=args.timeout) | |
| 176 if response and response.getcode() == 200: | |
| 177 data = [line.decode() for line in response.readlines()] | |
| 178 if args.debug: | |
| 179 print(data, file=sys.stderr) | |
| 180 rslts = results | |
| 181 for ln, line in enumerate(data): | |
| 182 if line.lower().find('invalid') >= 0: | |
| 183 msg = '%s %s\n%s' % (url, unquote(req_data), | |
| 184 ''.join(data)) | |
| 185 warn_err(msg, exit_code=1) | |
| 186 if line.find('eptide') > 0: | |
| 187 header = "#%s%s" %\ | |
| 188 ("ID\t" if seqid else "", line) | |
| 189 if args.debug: | |
| 190 print(header, file=sys.stderr) | |
| 191 continue | |
| 192 elif method == 'Bepipred' and line.find('Residue') > 0: | |
| 193 header2 = "#%s%s" %\ | |
| 194 ("ID\t" if seqid else "", line) | |
| 195 if args.debug: | |
| 196 print(header2, file=sys.stderr) | |
| 197 rslts = results2 | |
| 198 continue | |
| 199 if seqid: | |
| 200 rslts.extend("%s\t%s" % (seqid, line)) | |
| 201 else: | |
| 202 rslts.extend(line) | |
| 203 break | |
| 204 else: | |
| 205 code = response.getcode() if response else 1 | |
| 206 warn_err("Error connecting to IEDB server\n", | |
| 207 exit_code=code) | |
| 208 except HTTPError as e: | |
| 209 code = None if retry < args.retries else e.code | |
| 210 warn_err("%d of %d Error connecting to IEDB server %s\n" % | |
| 211 (retry, retries, e), | |
| 212 exit_code=code) | |
| 213 time.sleep(args.sleep) | |
| 214 except Exception as e: | |
| 215 warn_err("Error connecting to IEDB server %s\n" % e, | |
| 216 exit_code=3) | |
| 217 | 247 |
| 218 if args.sequence: | 248 if args.sequence: |
| 219 for i, seq in enumerate(args.sequence): | 249 for i, seq in enumerate(args.sequence): |
| 220 query(url, seq, alleles, lengths, seqid=None, method=method) | 250 seqid = 'pep_%d' % i |
| 251 query(url, args.prediction, seq, allele, length, results, | |
| 252 seqid=seqid, method=method, proteasome=proteasome, | |
| 253 timeout=args.timeout, retries=args.retries, | |
| 254 sleep=args.sleep, debug=args.debug) | |
| 221 if args.input: | 255 if args.input: |
| 222 try: | 256 try: |
| 223 fh = open(args.input, 'r') | 257 fh = open(args.input, 'r') |
| 224 if args.column: # tabular | 258 if args.column: # tabular |
| 225 col = int(args.column) | 259 col = int(args.column) |
| 226 idcol = int(args.id_column) if args.id_column else None | 260 idcol = int(args.id_column) if args.id_column else None |
| 227 for i, line in enumerate(fh): | 261 for i, line in enumerate(fh): |
| 228 fields = line.split('\t') | 262 fields = line.rstrip('\r\n').split('\t') |
| 229 if len(fields) > col: | 263 if len(fields) > col: |
| 230 seq = re.sub('[_*]', '', fields[col]) | 264 seq = re.sub('[_*]', '', fields[col].strip()) |
| 231 if re.match(aapat, seq): | 265 if re.match(aapat, seq): |
| 232 if idcol is not None and idcol < len(fields): | 266 if idcol is not None and idcol < len(fields): |
| 233 seqid = fields[idcol] | 267 seqid = fields[idcol] |
| 234 else: | 268 else: |
| 235 seqid = None | 269 seqid = 'pep_%d' % i |
| 236 query(url, seq, alleles, lengths, | 270 query(url, args.prediction, seq, allele, length, |
| 237 seqid=seqid, method=method) | 271 results, seqid=seqid, |
| 272 method=method, proteasome=proteasome, | |
| 273 timeout=args.timeout, retries=args.retries, | |
| 274 sleep=args.sleep, debug=args.debug) | |
| 238 else: | 275 else: |
| 239 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), | 276 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), |
| 240 exit_code=None) | 277 exit_code=None) |
| 241 else: # fasta | 278 else: # fasta |
| 242 seqid = None | 279 seqid = None |
| 243 seq = '' | 280 seq = '' |
| 244 for i, line in enumerate(fh): | 281 for i, line in enumerate(fh): |
| 245 if line.startswith('>'): | 282 if line.startswith('>'): |
| 246 if seqid and len(seq) > 0: | 283 if seqid and len(seq) > 0: |
| 247 query(url, seq, alleles, lengths, | 284 query(url, args.prediction, seq, allele, length, |
| 248 seqid=seqid, method=method) | 285 results, seqid=seqid, |
| 286 method=method, proteasome=proteasome, | |
| 287 timeout=args.timeout, retries=args.retries, | |
| 288 sleep=args.sleep, debug=args.debug) | |
| 249 seqid = line[1:].strip() | 289 seqid = line[1:].strip() |
| 250 seq = '' | 290 seq = '' |
| 251 else: | 291 else: |
| 252 seq += line.strip() | 292 seq += line.strip() |
| 253 if seqid and len(seq) > 0: | 293 if seqid and len(seq) > 0: |
| 254 query(url, seq, alleles, lengths, | 294 query(url, args.prediction, seq, allele, length, |
| 255 seqid=seqid, method=method) | 295 results, seqid=seqid, |
| 296 method=method, proteasome=proteasome, | |
| 297 timeout=args.timeout, retries=args.retries, | |
| 298 sleep=args.sleep, debug=args.debug) | |
| 256 fh.close() | 299 fh.close() |
| 257 except Exception as e: | 300 except Exception as e: |
| 258 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | 301 warn_err("Unable to open input file: %s\n" % e, exit_code=1) |
| 259 | 302 |
| 260 if header: | 303 if results['prediction']['header']: |
| 261 outputFile.write(header) | 304 outputFile.write(results['prediction']['header']) |
| 262 for line in results: | 305 for line in results['prediction']['entries']: |
| 263 outputFile.write(line) | 306 outputFile.write(line) |
| 264 if results2: | 307 if results['detail']['entries']: |
| 265 if args.output2: | 308 if args.output2: |
| 266 try: | 309 try: |
| 267 outPath = os.path.abspath(args.output2) | 310 outPath = os.path.abspath(args.output2) |
| 268 outFile = open(outPath, 'w') | 311 outFile = open(outPath, 'w') |
| 269 except Exception as e: | 312 except Exception as e: |
| 270 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | 313 warn_err("Unable to open output file: %s\n" % e, exit_code=1) |
| 271 else: | 314 else: |
| 272 outFile = sys.stdout | 315 outFile = sys.stdout |
| 273 if header2: | 316 if results['detail']['header']: |
| 274 outFile.write(header2) | 317 outFile.write(results['detail']['header']) |
| 275 for line in results2: | 318 for line in results['detail']['entries']: |
| 276 outFile.write(line) | 319 outFile.write(line) |
| 277 | 320 |
| 278 | 321 |
| 279 if __name__ == "__main__": | 322 if __name__ == "__main__": |
| 280 __main__() | 323 __main__() |
