Mercurial > repos > iuc > iedb_api
diff iedb_api.py @ 0:fe3c43451319 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/iedb_api commit 9aaa7c6c7241db52681b12939ebd908902830ef1"
author | iuc |
---|---|
date | Fri, 28 Feb 2020 18:09:34 -0500 |
parents | |
children | 6cf84410cb2e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/iedb_api.py Fri Feb 28 18:09:34 2020 -0500 @@ -0,0 +1,319 @@ +#!/usr/bin/env python + +import argparse +import os.path +import re +import sys +import time +from urllib.error import HTTPError +from urllib.parse import unquote, urlencode +from urllib.request import urlopen + +mhci_methods = ['recommended', 'consensus', + 'netmhcpan_ba', 'netmhcpan_el', + 'ann', 'smmpmbec', 'smm', + 'comblib_sidney2008', 'netmhccons', + 'pickpocket', 'netmhcstabpan'] +mhcii_methods = ['recommended', 'consensus', 'NetMHCIIpan', + 'nn_align', 'smm_align', 'comblib', 'tepitope'] +processing_methods = ['recommended', 'netmhcpan', 'ann', + 'smmpmbec', 'smm', 'comblib_sidney2008', + 'netmhccons', 'pickpocket'] +mhcnp_methods = ['mhcnp', 'netmhcpan'] +bcell_methods = ['Bepipred', 'Chou-Fasman', 'Emini', 'Karplus-Schulz', + 'Kolaskar-Tongaonkar', 'Parker', 'Bepipred-2.0'] +prediction_methods = {'mhci': mhci_methods, + 'mhcii': mhcii_methods, + 'processing': processing_methods, + 'mhcnp': mhcnp_methods, + 'bcell': bcell_methods} +all_methods = set(mhci_methods + mhcii_methods + + mhcnp_methods + bcell_methods) +prediction_lengths = {'mhci': range(8, 16), + 'mhcii': range(11, 31), + 'processing': range(8, 15), + 'mhcnp': range(8, 12), + 'bcell': range(8, 16)} + + +def parse_alleles(allelefile, lengths): + alleles = [] + lengths = [] + with open(allelefile, 'r') as fh: + for i, line in enumerate(fh): + fields = line.strip().split(',') + allele = fields[0].strip() + if allele: + if len(fields) > 1: + for alen in fields[1:]: + alleles.append(allele) + lengths.append(alen) + elif lengths: + for alen in str(lengths).split(','): + alleles.append(allele) + lengths.append(alen) + else: + alleles.append(allele) + return (alleles, lengths) + + +def query(url, prediction, seq, allele, length, results, + seqid=None, method='recommended', proteasome=None, + timeout=300, retries=3, sleep=300, debug=False): + params = dict() + if method: + params['method'] = method.encode() + if proteasome: + params['proteasome'] = proteasome.encode() + params['sequence_text'] = seq.strip().encode() + if allele is not None: + params['allele'] = allele.encode() + if length is not None: + if prediction == 'bcell': + params['window_size'] = str(length).encode() + else: + params['length'] = str(length).encode() + req_data = urlencode(params) + if debug: + print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) + retries = max(0, retries) + 1 + for retry in range(1, retries): + response = None + try: + response = urlopen(url, data=req_data.encode('utf-8'), + timeout=timeout) + if response and response.getcode() == 200: + data = [line.decode() for line in response.readlines()] + if debug: + print(data, file=sys.stderr) + rslts = results['prediction']['entries'] + for ln, line in enumerate(data): + if 'invalid' in line.lower() or 'tools_api.html' in line: + msg = '%s %s\n%s' % (url, unquote(req_data), + ''.join(data)) + warn_err(msg, exit_code=1) + if line.find('eptide') > 0: + results['prediction']['header'] = "#%s%s" %\ + ("ID\t" if seqid else "", line) + continue + elif method == 'Bepipred' and line.find('Residue') > 0: + results['detail']['header'] = "#%s%s" %\ + ("ID\t" if seqid else "", line) + rslts = results['detail']['entries'] + continue + if seqid: + rslts.extend("%s\t%s" % (seqid, line)) + else: + rslts.extend(line) + break + else: + code = response.getcode() if response else 1 + warn_err("Error connecting to IEDB server\n", + exit_code=code) + except HTTPError as e: + code = None if retry < retries else e.code + warn_err("%d of %d Error connecting to IEDB server %s\n" % + (retry, retries, e), + exit_code=code) + time.sleep(sleep) + except Exception as e: + warn_err("Error connecting to IEDB server %s\n" % e, + exit_code=3) + return results + + +def warn_err(msg, exit_code=1): + sys.stderr.write(msg) + sys.stderr.flush() + if exit_code: + sys.exit(exit_code) + + +def __main__(): + # Parse Command Line + parser = argparse.ArgumentParser(description='', epilog='') + parser.add_argument('-p', '--prediction', + default='mhci', + choices=prediction_methods.keys(), + help='IEDB API prediction service') + parser.add_argument('-s', '--sequence', + action="append", + default=None, + help='Peptide Sequence') + parser.add_argument('-m', '--method', + default='recommended', + choices=all_methods, + help='prediction method') + parser.add_argument('-P', '--proteasome', + default=None, + choices=['immuno', 'constitutive'], + help='IEDB processing proteasome type') + parser.add_argument('-a', '--allele', + action="append", + default=[], + help='Alleles for which to make predictions') + parser.add_argument('-A', '--allelefile', + default=None, + help='File of HLA alleles') + parser.add_argument('-l', '--length', + action="append", + default=[], + help='lengths for which to make predictions, ' + + '1 per allele') + parser.add_argument('-w', '--window_size', + type=int, + default=None, + help='window_size for bcell prediction') + parser.add_argument('-i', '--input', + default=None, + help='Input file for peptide sequences ' + + '(fasta or tabular)') + parser.add_argument('-c', '--column', + default=None, + help='Peptide Column in a tabular input file') + parser.add_argument('-C', '--id_column', + default=None, + help='ID Column in a tabular input file') + parser.add_argument('-o', '--output', + default=None, + help='Output file for query results') + parser.add_argument('-O', '--output2', + default='iedb_results2', + help='Output file for secondary query results') + parser.add_argument('-t', '--timeout', + type=int, + default=600, + help='Seconds to wait for server response') + parser.add_argument('-r', '--retries', + type=int, + default=5, + help='Number of times to retry server query') + parser.add_argument('-S', '--sleep', + type=int, + default=300, + help='Seconds to wait between retries') + parser.add_argument('-d', '--debug', + action='store_true', + default=False, + help='Turn on wrapper debugging to stderr') + args = parser.parse_args() + + aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' + + if args.prediction != 'bcell': + if not args.allele and not args.allelefile: + warn_err('-a allele or -A allelefile required\n', exit_code=1) + + if not (args.sequence or args.input): + warn_err('NO Sequences given: ' + + 'either -s sequence or -i input_file is required\n', + exit_code=1) + + if args.output is not None: + try: + outputPath = os.path.abspath(args.output) + outputFile = open(outputPath, 'w') + except Exception as e: + warn_err("Unable to open output file: %s\n" % e, exit_code=1) + else: + outputFile = sys.stdout + + # params + alleles = [] + lengths = [] + if args.prediction == 'bcell' and args.window_size is not None: + lengths.append(str(args.window_size)) + else: + if args.allelefile: + (alleles, lengths) = parse_alleles(args.allelefile, args.length) + if args.allele: + for i, allele in enumerate(args.allele): + alleles.append(allele) + alen = args.length[i] if i < len(args.length) else args.length[-1] + lengths.append(alen) + allele = ','.join(alleles) if alleles else None + length = ','.join(lengths) if lengths else None + method = args.method + proteasome = args.proteasome if args.prediction == 'processcing' else None + url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ + args.prediction + + # results + results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}} + + if args.sequence: + for i, seq in enumerate(args.sequence): + seqid = 'pep_%d' % i + query(url, args.prediction, seq, allele, length, results, + seqid=seqid, method=method, proteasome=proteasome, + timeout=args.timeout, retries=args.retries, + sleep=args.sleep, debug=args.debug) + if args.input: + try: + fh = open(args.input, 'r') + if args.column: # tabular + col = int(args.column) + idcol = int(args.id_column) if args.id_column else None + for i, line in enumerate(fh): + fields = line.rstrip('\r\n').split('\t') + if len(fields) > col: + seq = re.sub('[_*]', '', fields[col].strip()) + if re.match(aapat, seq): + if idcol is not None and idcol < len(fields): + seqid = fields[idcol] + else: + seqid = 'pep_%d' % i + query(url, args.prediction, seq, allele, length, + results, seqid=seqid, + method=method, proteasome=proteasome, + timeout=args.timeout, retries=args.retries, + sleep=args.sleep, debug=args.debug) + else: + warn_err('Line %d, Not a peptide: %s\n' % (i, seq), + exit_code=None) + else: # fasta + seqid = None + seq = '' + for i, line in enumerate(fh): + if line.startswith('>'): + if seqid and len(seq) > 0: + query(url, args.prediction, seq, allele, length, + results, seqid=seqid, + method=method, proteasome=proteasome, + timeout=args.timeout, retries=args.retries, + sleep=args.sleep, debug=args.debug) + seqid = line[1:].strip() + seq = '' + else: + seq += line.strip() + if seqid and len(seq) > 0: + query(url, args.prediction, seq, allele, length, + results, seqid=seqid, + method=method, proteasome=proteasome, + timeout=args.timeout, retries=args.retries, + sleep=args.sleep, debug=args.debug) + fh.close() + except Exception as e: + warn_err("Unable to open input file: %s\n" % e, exit_code=1) + + if results['prediction']['header']: + outputFile.write(results['prediction']['header']) + for line in results['prediction']['entries']: + outputFile.write(line) + if results['detail']['entries']: + if args.output2: + try: + outPath = os.path.abspath(args.output2) + outFile = open(outPath, 'w') + except Exception as e: + warn_err("Unable to open output file: %s\n" % e, exit_code=1) + else: + outFile = sys.stdout + if results['detail']['header']: + outFile.write(results['detail']['header']) + for line in results['detail']['entries']: + outFile.write(line) + + +if __name__ == "__main__": + __main__()