Mercurial > repos > jjohnson > iedb_api
view iedb_api.py @ 4:a14128950578 draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 98a9dd3bd9c567e8b8e43ac5b54c4ba75a6fe78d"
author | jjohnson |
---|---|
date | Fri, 28 Feb 2020 15:45:14 -0500 |
parents | 153d5fa7af53 |
children |
line wrap: on
line source
#!/usr/bin/env python import argparse import os.path import re import sys import time from urllib.error import HTTPError from urllib.parse import unquote, urlencode from urllib.request import urlopen mhci_methods = ['recommended', 'consensus', 'netmhcpan_ba', 'netmhcpan_el', 'ann', 'smmpmbec', 'smm', 'comblib_sidney2008', 'netmhccons', 'pickpocket', 'netmhcstabpan'] mhcii_methods = ['recommended', 'consensus', 'NetMHCIIpan', 'nn_align', 'smm_align', 'comblib', 'tepitope'] processing_methods = ['recommended', 'netmhcpan', 'ann', 'smmpmbec', 'smm', 'comblib_sidney2008', 'netmhccons', 'pickpocket'] mhcnp_methods = ['mhcnp', 'netmhcpan'] bcell_methods = ['Bepipred', 'Chou-Fasman', 'Emini', 'Karplus-Schulz', 'Kolaskar-Tongaonkar', 'Parker', 'Bepipred-2.0'] prediction_methods = {'mhci': mhci_methods, 'mhcii': mhcii_methods, 'processing': processing_methods, 'mhcnp': mhcnp_methods, 'bcell': bcell_methods} all_methods = set(mhci_methods + mhcii_methods + mhcnp_methods + bcell_methods) prediction_lengths = {'mhci': range(8, 16), 'mhcii': range(11, 31), 'processing': range(8, 15), 'mhcnp': range(8, 12), 'bcell': range(8, 16)} prediction_species = {'mhci': [], 'mhcii': range(11, 31), 'processing': range(8, 15), 'mhcnp': range(8, 12), 'bcell': range(8, 16)} def parse_alleles(allelefile, lengths): alleles = [] lengths = [] with open(allelefile, 'r') as fh: for i, line in enumerate(fh): fields = line.strip().split(',') allele = fields[0].strip() if allele: if len(fields) > 1: for alen in fields[1:]: alleles.append(allele) lengths.append(alen) elif lengths: for alen in str(lengths).split(','): alleles.append(allele) lengths.append(alen) else: alleles.append(allele) return (alleles, lengths) def query(url, prediction, seq, allele, length, results, seqid=None, method='recommended', proteasome=None, timeout=300, retries=3, sleep=300, debug=False): params = dict() if method: params['method'] = method.encode() if proteasome: params['proteasome'] = proteasome.encode() params['sequence_text'] = seq.strip().encode() if allele is not None: params['allele'] = allele.encode() if length is not None: if prediction == 'bcell': params['window_size'] = str(length).encode() else: params['length'] = str(length).encode() req_data = urlencode(params) if debug: print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) retries = max(0, retries) + 1 for retry in range(1, retries): response = None try: response = urlopen(url, data=req_data.encode('utf-8'), timeout=timeout) if response and response.getcode() == 200: data = [line.decode() for line in response.readlines()] if debug: print(data, file=sys.stderr) rslts = results['prediction']['entries'] for ln, line in enumerate(data): if 'invalid' in line.lower() or 'tools_api.html' in line: msg = '%s %s\n%s' % (url, unquote(req_data), ''.join(data)) warn_err(msg, exit_code=1) if line.find('eptide') > 0: results['prediction']['header'] = "#%s%s" %\ ("ID\t" if seqid else "", line) continue elif method == 'Bepipred' and line.find('Residue') > 0: results['detail']['header'] = "#%s%s" %\ ("ID\t" if seqid else "", line) rslts = results['detail']['entries'] continue if seqid: rslts.extend("%s\t%s" % (seqid, line)) else: rslts.extend(line) break else: code = response.getcode() if response else 1 warn_err("Error connecting to IEDB server\n", exit_code=code) except HTTPError as e: code = None if retry < retries else e.code warn_err("%d of %d Error connecting to IEDB server %s\n" % (retry, retries, e), exit_code=code) time.sleep(sleep) except Exception as e: warn_err("Error connecting to IEDB server %s\n" % e, exit_code=3) return results def warn_err(msg, exit_code=1): sys.stderr.write(msg) sys.stderr.flush() if exit_code: sys.exit(exit_code) def __main__(): # Parse Command Line parser = argparse.ArgumentParser(description='', epilog='') parser.add_argument('-p', '--prediction', default='mhci', choices=prediction_methods.keys(), help='IEDB API prediction service') parser.add_argument('-s', '--sequence', action="append", default=None, help='Peptide Sequence') parser.add_argument('-m', '--method', default='recommended', choices=all_methods, help='prediction method') parser.add_argument('-P', '--proteasome', default=None, choices=['immuno', 'constitutive'], help='IEDB processing proteasome type') parser.add_argument('-a', '--allele', action="append", default=[], help='Alleles for which to make predictions') parser.add_argument('-A', '--allelefile', default=None, help='File of HLA alleles') parser.add_argument('-l', '--length', action="append", default=[], help='lengths for which to make predictions, ' + '1 per allele') parser.add_argument('-w', '--window_size', type=int, default=None, help='window_size for bcell prediction') parser.add_argument('-i', '--input', default=None, help='Input file for peptide sequences ' + '(fasta or tabular)') parser.add_argument('-c', '--column', default=None, help='Peptide Column in a tabular input file') parser.add_argument('-C', '--id_column', default=None, help='ID Column in a tabular input file') parser.add_argument('-o', '--output', default=None, help='Output file for query results') parser.add_argument('-O', '--output2', default='iedb_results2', help='Output file for secondary query results') parser.add_argument('-t', '--timeout', type=int, default=600, help='Seconds to wait for server response') parser.add_argument('-r', '--retries', type=int, default=5, help='Number of times to retry server query') parser.add_argument('-S', '--sleep', type=int, default=300, help='Seconds to wait between retries') parser.add_argument('-d', '--debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') args = parser.parse_args() aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' if args.prediction != 'bcell': if not args.allele and not args.allelefile: warn_err('-a allele or -A allelefile required\n', exit_code=1) if not (args.sequence or args.input): warn_err('NO Sequences given: ' + 'either -s sequence or -i input_file is required\n', exit_code=1) if args.output is not None: try: outputPath = os.path.abspath(args.output) outputFile = open(outputPath, 'w') except Exception as e: warn_err("Unable to open output file: %s\n" % e, exit_code=1) else: outputFile = sys.stdout alleles = [] lengths = [] # TODO parse alleles from the args.alleles file if args.prediction == 'bcell' and args.window_size is not None: lengths.append(str(args.window_size)) else: if args.allelefile: (alleles, lengths) = parse_alleles(args.allelefile, args.length) if args.allele: for i, allele in enumerate(args.allele): alleles.append(allele) alen = args.length[i] if i < len(args.length) else args.length[-1] lengths.append(alen) allele = ','.join(alleles) if alleles else None length = ','.join(lengths) if lengths else None method = args.method proteasome = args.proteasome if args.prediction == 'processcing' else None url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ args.prediction # results results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}} if args.sequence: for i, seq in enumerate(args.sequence): seqid = 'pep_%d' % i query(url, args.prediction, seq, allele, length, results, seqid=seqid, method=method, proteasome=proteasome, timeout=args.timeout, retries=args.retries, sleep=args.sleep, debug=args.debug) if args.input: try: fh = open(args.input, 'r') if args.column: # tabular col = int(args.column) idcol = int(args.id_column) if args.id_column else None for i, line in enumerate(fh): fields = line.rstrip('\r\n').split('\t') if len(fields) > col: seq = re.sub('[_*]', '', fields[col].strip()) if re.match(aapat, seq): if idcol is not None and idcol < len(fields): seqid = fields[idcol] else: seqid = 'pep_%d' % i query(url, args.prediction, seq, allele, length, results, seqid=seqid, method=method, proteasome=proteasome, timeout=args.timeout, retries=args.retries, sleep=args.sleep, debug=args.debug) else: warn_err('Line %d, Not a peptide: %s\n' % (i, seq), exit_code=None) else: # fasta seqid = None seq = '' for i, line in enumerate(fh): if line.startswith('>'): if seqid and len(seq) > 0: query(url, args.prediction, seq, allele, length, results, seqid=seqid, method=method, proteasome=proteasome, timeout=args.timeout, retries=args.retries, sleep=args.sleep, debug=args.debug) seqid = line[1:].strip() seq = '' else: seq += line.strip() if seqid and len(seq) > 0: query(url, args.prediction, seq, allele, length, results, seqid=seqid, method=method, proteasome=proteasome, timeout=args.timeout, retries=args.retries, sleep=args.sleep, debug=args.debug) fh.close() except Exception as e: warn_err("Unable to open input file: %s\n" % e, exit_code=1) if results['prediction']['header']: outputFile.write(results['prediction']['header']) for line in results['prediction']['entries']: outputFile.write(line) if results['detail']['entries']: if args.output2: try: outPath = os.path.abspath(args.output2) outFile = open(outPath, 'w') except Exception as e: warn_err("Unable to open output file: %s\n" % e, exit_code=1) else: outFile = sys.stdout if results['detail']['header']: outFile.write(results['detail']['header']) for line in results['detail']['entries']: outFile.write(line) if __name__ == "__main__": __main__()