Mercurial > repos > jjohnson > iedb_api
changeset 4:a14128950578 draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 98a9dd3bd9c567e8b8e43ac5b54c4ba75a6fe78d"
author | jjohnson |
---|---|
date | Fri, 28 Feb 2020 15:45:14 -0500 |
parents | 153d5fa7af53 |
children | |
files | iedb_api.py iedb_api.xml |
diffstat | 2 files changed, 192 insertions(+), 137 deletions(-) [+] |
line wrap: on
line diff
--- a/iedb_api.py Wed Feb 26 16:27:06 2020 -0500 +++ b/iedb_api.py Fri Feb 28 15:45:14 2020 -0500 @@ -34,10 +34,102 @@ 'processing': range(8, 15), 'mhcnp': range(8, 12), 'bcell': range(8, 16)} +prediction_species = {'mhci': [], + 'mhcii': range(11, 31), + 'processing': range(8, 15), + 'mhcnp': range(8, 12), + 'bcell': range(8, 16)} + + +def parse_alleles(allelefile, lengths): + alleles = [] + lengths = [] + with open(allelefile, 'r') as fh: + for i, line in enumerate(fh): + fields = line.strip().split(',') + allele = fields[0].strip() + if allele: + if len(fields) > 1: + for alen in fields[1:]: + alleles.append(allele) + lengths.append(alen) + elif lengths: + for alen in str(lengths).split(','): + alleles.append(allele) + lengths.append(alen) + else: + alleles.append(allele) + return (alleles, lengths) + + +def query(url, prediction, seq, allele, length, results, + seqid=None, method='recommended', proteasome=None, + timeout=300, retries=3, sleep=300, debug=False): + params = dict() + if method: + params['method'] = method.encode() + if proteasome: + params['proteasome'] = proteasome.encode() + params['sequence_text'] = seq.strip().encode() + if allele is not None: + params['allele'] = allele.encode() + if length is not None: + if prediction == 'bcell': + params['window_size'] = str(length).encode() + else: + params['length'] = str(length).encode() + req_data = urlencode(params) + if debug: + print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) + retries = max(0, retries) + 1 + for retry in range(1, retries): + response = None + try: + response = urlopen(url, data=req_data.encode('utf-8'), + timeout=timeout) + if response and response.getcode() == 200: + data = [line.decode() for line in response.readlines()] + if debug: + print(data, file=sys.stderr) + rslts = results['prediction']['entries'] + for ln, line in enumerate(data): + if 'invalid' in line.lower() or 'tools_api.html' in line: + msg = '%s %s\n%s' % (url, unquote(req_data), + ''.join(data)) + warn_err(msg, exit_code=1) + if line.find('eptide') > 0: + results['prediction']['header'] = "#%s%s" %\ + ("ID\t" if seqid else "", line) + continue + elif method == 'Bepipred' and line.find('Residue') > 0: + results['detail']['header'] = "#%s%s" %\ + ("ID\t" if seqid else "", line) + rslts = results['detail']['entries'] + continue + if seqid: + rslts.extend("%s\t%s" % (seqid, line)) + else: + rslts.extend(line) + break + else: + code = response.getcode() if response else 1 + warn_err("Error connecting to IEDB server\n", + exit_code=code) + except HTTPError as e: + code = None if retry < retries else e.code + warn_err("%d of %d Error connecting to IEDB server %s\n" % + (retry, retries, e), + exit_code=code) + time.sleep(sleep) + except Exception as e: + warn_err("Error connecting to IEDB server %s\n" % e, + exit_code=3) + return results def warn_err(msg, exit_code=1): sys.stderr.write(msg) + sys.stderr.flush() if exit_code: sys.exit(exit_code) @@ -65,6 +157,9 @@ action="append", default=[], help='Alleles for which to make predictions') + parser.add_argument('-A', '--allelefile', + default=None, + help='File of HLA alleles') parser.add_argument('-l', '--length', action="append", default=[], @@ -110,8 +205,9 @@ aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' - if not args.allele and args.prediction != 'bcell': - warn_err('-a allele required\n', exit_code=1) + if args.prediction != 'bcell': + if not args.allele and not args.allelefile: + warn_err('-a allele or -A allelefile required\n', exit_code=1) if not (args.sequence or args.input): warn_err('NO Sequences given: ' + @@ -127,97 +223,35 @@ else: outputFile = sys.stdout - url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ - args.prediction - len_param = 'length' if args.prediction != 'bcell' else 'window_size' - + alleles = [] + lengths = [] # TODO parse alleles from the args.alleles file - alleles = ','.join(args.allele) if args.prediction != 'bcell' else None - lengths = ','.join(args.length) - if args.prediction == 'bcell': - lengths = args.window_size + if args.prediction == 'bcell' and args.window_size is not None: + lengths.append(str(args.window_size)) + else: + if args.allelefile: + (alleles, lengths) = parse_alleles(args.allelefile, args.length) + if args.allele: + for i, allele in enumerate(args.allele): + alleles.append(allele) + alen = args.length[i] if i < len(args.length) else args.length[-1] + lengths.append(alen) + allele = ','.join(alleles) if alleles else None + length = ','.join(lengths) if lengths else None method = args.method proteasome = args.proteasome if args.prediction == 'processcing' else None - global header - header = None - results = [] - global header2 - header2 = None - results2 = [] - - sequence_text = [] - - def add_seq(seqid, seq): - sid = seqid if seqid else "peptide%d" % len(sequence_text) - sequence_text.append(">%s\n%s" % (sid, seq)) - - def query(url, seq, allele, length, seqid=None, method='recommended'): - global header - global header2 - params = dict() - if method: - params['method'] = method.encode() - if proteasome: - params['proteasome'] = proteasome.encode() - params['sequence_text'] = seq.encode() - if allele is not None: - params['allele'] = allele.encode() - if length is not None: - params[len_param] = str(length).encode() - req_data = urlencode(params) - if args.debug: - print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) - retries = max(0, args.retries) + 1 - for retry in range(1, retries): - response = None - try: - response = urlopen(url, data=req_data.encode('utf-8'), - timeout=args.timeout) - if response and response.getcode() == 200: - data = [line.decode() for line in response.readlines()] - if args.debug: - print(data, file=sys.stderr) - rslts = results - for ln, line in enumerate(data): - if line.lower().find('invalid') >= 0: - msg = '%s %s\n%s' % (url, unquote(req_data), - ''.join(data)) - warn_err(msg, exit_code=1) - if line.find('eptide') > 0: - header = "#%s%s" %\ - ("ID\t" if seqid else "", line) - if args.debug: - print(header, file=sys.stderr) - continue - elif method == 'Bepipred' and line.find('Residue') > 0: - header2 = "#%s%s" %\ - ("ID\t" if seqid else "", line) - if args.debug: - print(header2, file=sys.stderr) - rslts = results2 - continue - if seqid: - rslts.extend("%s\t%s" % (seqid, line)) - else: - rslts.extend(line) - break - else: - code = response.getcode() if response else 1 - warn_err("Error connecting to IEDB server\n", - exit_code=code) - except HTTPError as e: - code = None if retry < args.retries else e.code - warn_err("%d of %d Error connecting to IEDB server %s\n" % - (retry, retries, e), - exit_code=code) - time.sleep(args.sleep) - except Exception as e: - warn_err("Error connecting to IEDB server %s\n" % e, - exit_code=3) + url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ + args.prediction + # results + results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}} if args.sequence: for i, seq in enumerate(args.sequence): - query(url, seq, alleles, lengths, seqid=None, method=method) + seqid = 'pep_%d' % i + query(url, args.prediction, seq, allele, length, results, + seqid=seqid, method=method, proteasome=proteasome, + timeout=args.timeout, retries=args.retries, + sleep=args.sleep, debug=args.debug) if args.input: try: fh = open(args.input, 'r') @@ -225,16 +259,19 @@ col = int(args.column) idcol = int(args.id_column) if args.id_column else None for i, line in enumerate(fh): - fields = line.split('\t') + fields = line.rstrip('\r\n').split('\t') if len(fields) > col: - seq = re.sub('[_*]', '', fields[col]) + seq = re.sub('[_*]', '', fields[col].strip()) if re.match(aapat, seq): if idcol is not None and idcol < len(fields): seqid = fields[idcol] else: - seqid = None - query(url, seq, alleles, lengths, - seqid=seqid, method=method) + seqid = 'pep_%d' % i + query(url, args.prediction, seq, allele, length, + results, seqid=seqid, + method=method, proteasome=proteasome, + timeout=args.timeout, retries=args.retries, + sleep=args.sleep, debug=args.debug) else: warn_err('Line %d, Not a peptide: %s\n' % (i, seq), exit_code=None) @@ -244,24 +281,30 @@ for i, line in enumerate(fh): if line.startswith('>'): if seqid and len(seq) > 0: - query(url, seq, alleles, lengths, - seqid=seqid, method=method) + query(url, args.prediction, seq, allele, length, + results, seqid=seqid, + method=method, proteasome=proteasome, + timeout=args.timeout, retries=args.retries, + sleep=args.sleep, debug=args.debug) seqid = line[1:].strip() seq = '' else: seq += line.strip() if seqid and len(seq) > 0: - query(url, seq, alleles, lengths, - seqid=seqid, method=method) + query(url, args.prediction, seq, allele, length, + results, seqid=seqid, + method=method, proteasome=proteasome, + timeout=args.timeout, retries=args.retries, + sleep=args.sleep, debug=args.debug) fh.close() except Exception as e: warn_err("Unable to open input file: %s\n" % e, exit_code=1) - if header: - outputFile.write(header) - for line in results: + if results['prediction']['header']: + outputFile.write(results['prediction']['header']) + for line in results['prediction']['entries']: outputFile.write(line) - if results2: + if results['detail']['entries']: if args.output2: try: outPath = os.path.abspath(args.output2) @@ -270,9 +313,9 @@ warn_err("Unable to open output file: %s\n" % e, exit_code=1) else: outFile = sys.stdout - if header2: - outFile.write(header2) - for line in results2: + if results['detail']['header']: + outFile.write(results['detail']['header']) + for line in results['detail']['entries']: outFile.write(line)
--- a/iedb_api.xml Wed Feb 26 16:27:06 2020 -0500 +++ b/iedb_api.xml Fri Feb 28 15:45:14 2020 -0500 @@ -24,6 +24,7 @@ <requirements> <requirement type="package" version="3.7">python</requirement> </requirements> + <command detect_errors="exit_code"><![CDATA[ #import re python '${__tool_directory__}/iedb_api.py' @@ -38,37 +39,9 @@ --proteasome $prediction.proteasome #end if #if $prediction.alleles.allelesrc == 'history': - #for $line in open(str($prediction.alleles.allele_file)): - #set $fields = $line.strip().split(',') - #set $allele = $fields[0].strip() - #if len($allele) > 0: - #if len($fields) > 1: - #for $alen in $fields[1:]: - -a '$allele' -l $alen - #end for - #else: - #for $alen in str($prediction.lengths).split(','): - -a '$allele' -l $alen - #end for - #end if - #end if - #end for + -A '$prediction.alleles.allele_file' #else: - #for $word in str($prediction.alleles.allele_text).strip().split(): - #set $fields = $word.strip().split(',') - #set $allele = $fields[0].strip() - #if len($allele) > 0: - #if len($fields) > 1: - #for $alen in $fields[1:]: - -a '$allele' -l $alen - #end for - #else: - #for $alen in str($prediction.lengths).split(','): - -a '$allele' -l $alen - #end for - #end if - #end if - #end for + -A '$entered_alleles' #end if #end if @@ -81,12 +54,33 @@ -C #echo int(str($sequence.id_col)) - 1 #end if #else: - #for $seq in str($sequence.seq_text).strip().split(): - -s $seq.strip() - #end for + -i '$entered_seqs' -c 1 -C 0 #end if -o '$output' ]]></command> + <configfiles> + <configfile name="entered_alleles"><![CDATA[#slurp +#if $prediction.tool != 'bcell' and $prediction.alleles.allelesrc == 'entry' +#for $word in str($prediction.alleles.allele_text).strip().split(): +#if $word.find(',') > 0 +$word +#else +#set $allele = $word + ',' + str($prediction.lengths) +$allele +#end if +#end for +#end if +]]></configfile> + <configfile name="entered_seqs"><![CDATA[#slurp +#if $sequence.seqsrc == 'entry' +#for $i, $seq in enumerate(str($sequence.seq_text).strip().split()) +#set $seqid = $i + 1 +#set $seqtext = '\t'.join([str($seqid),$seq.strip()]) +$seqtext +#end for +#end if +]]></configfile> + </configfiles> <inputs> <conditional name="prediction"> <param name="tool" type="select" label="Prediction"> @@ -132,7 +126,7 @@ <option value="comblib">comblib</option> <option value="tepitope">tepitope</option> </param> - <expand macro="alleles" hla_regex="(DPA1\*0[1-3](:0[1-3])?/DPB1\*0[1-6]:0[12]|DQA1\*0[1-5]:0[12]/DQB1\*0[2-6]:0[12]|DRB[1-5]\*[01][1-9]:0[1-5]|H2-IA[bd])(,(asis|[1-2][0-9]|30))*" hla_examples="DPA1*01/DPB1*04:01 HLA-DRB1*01:01 H2-IAb" hlalen_examples="DPA1*01/DPB1*04:01,11,15"/> + <expand macro="alleles" hla_regex="(DPA1\*0[1-3](:0[1-3])?/DPB1\*0[1-6]:0[12]|DQA1\*0[1-5]:0[12]/DQB1\*0[2-6]:0[12]|(HLA-)?DRB[1-5]\*[01][1-9]:0[1-5]|H2-IA[bd])(,(asis|[1-2][0-9]|30))*" hla_examples="DPA1*01/DPB1*04:01 HLA-DRB1*01:01 H2-IAb" hlalen_examples="DPA1*01/DPB1*04:01,11,15"/> <param name="lengths" type="select" multiple="true" optional="false" label="peptide lengths for prediction"> <help>Used for any alleles which don't include specified lengths</help> <option value="asis">asis</option> @@ -382,6 +376,24 @@ </assert_contents> </output> </test> + <!-- test8 --> + <test> + <conditional name="prediction"> + <param name="tool" value="bcell"/> + <param name="method" value="Bepipred"/> + <param name="window_size" value="9"/> + </conditional> + <conditional name="sequence"> + <param name="seqsrc" value="fasta"/> + <param name="seq_fasta" ftype="fasta" value="bcell.fa"/> + </conditional> + <output name="output"> + <assert_contents> + <has_text text="ADVAGH" /> + </assert_contents> + </output> + </test> + </tests> <help><![CDATA[ The IEDB is a free resource, funded by a contract from the National Institute of Allergy and Infectious Diseases. It offers easy searching of experimental data characterizing antibody and T cell epitopes studied in humans, non-human primates, and other animal species.