Mercurial > repos > jjohnson > iedb_api
changeset 0:991424605492 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit bbca4d5248b883344319e7a9f42c82d20a11cf0d"
author | jjohnson |
---|---|
date | Mon, 17 Feb 2020 16:04:07 -0500 |
parents | |
children | 4a89ba6cfc63 |
files | iedb_api.py iedb_api.xml test-data/alleles.tsv test-data/seqs.fa test-data/seqs.tsv |
diffstat | 5 files changed, 386 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/iedb_api.py Mon Feb 17 16:04:07 2020 -0500 @@ -0,0 +1,142 @@ +#!/usr/bin/env python +""" +""" +import sys +import os.path +import re +import optparse +import urllib +import urllib2 +from optparse import OptionParser + +mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket'] +mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope'] +processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008'] +mhcnp_methods = ['mhcnp'] +bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker'] +prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods} + +def warn_err(msg,exit_code=1): + sys.stderr.write(msg) + if exit_code: + sys.exit(exit_code) + + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' ) + parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' ) + parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' ) + parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' ) + parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' ) + parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' ) + parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' ) + parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' ) + parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' ) + parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' ) + (options, args) = parser.parse_args() + + aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' + + if not options.allele and options.prediction != 'bcell': + warn_err('-a allele required\n', exit_code=1) + + if not (options.sequence or options.input): + warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1) + + if options.output != None: + try: + outputPath = os.path.abspath(options.output) + outputFile = open(outputPath, 'w') + except Exception, e: + warn_err("Unable to open output file: %s\n" % e, exit_code=1) + else: + outputFile = sys.stdout + + url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction + + #TODO parse alleles from the options.alleles file + alleles = ','.join(options.allele) + lengths = ','.join(options.length) + method = options.method + + results = [] + global header + header = None + + sequence_text = [] + def add_seq(seqid,seq): + sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq)) + + def query(url,seq,allele,length,seqid=None,method='recommended'): + global header + params = dict() + if method: + params['method'] = method + params['sequence_text'] = seq + params['allele'] = allele + params['length'] = length + data = urllib.urlencode(params) + request = urllib2.Request(url, data) + if options.debug: + print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq) + response = None + response = urllib2.urlopen(request) + if response and response.getcode() == 200: + resp_data = response.readlines() + for line in resp_data: + if line.find('eptide') > 0: + header = "#%s%s" % ("ID\t" if seqid else "", line) + continue + if seqid: + results.append("%s\t%s" % (seqid,line)) + else: + results.append(line) + elif not response: + warn_err("NO response from IEDB server\n", exit_code=3) + else: + warn_err("Error connecting to IEDB server\n", exit_code=response.getcode()) + + if options.sequence: + for i,seq in enumerate(options.sequence): + query(url,seq,alleles,lengths,seqid=None,method=method) + if options.input: + try: + fh = open(options.input,'r') + if options.column: ## tabular + col = int(options.column) + idcol = int(options.id_column) if options.id_column else None + for i,line in enumerate(fh): + fields = line.split('\t') + if len(fields) > col: + seq = re.sub('[_*]','',fields[col]) + if re.match(aapat,seq): + seqid = fields[idcol] if idcol != None and idcol < len(fields) else None + query(url,seq,alleles,lengths,seqid=seqid,method=method) + else: + warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None) + else: ## fasta + seqid = None + seq = '' + for i,line in enumerate(fh): + if line.startswith('>'): + if seqid and len(seq) > 0: + query(url,seq,alleles,lengths,seqid=seqid,method=method) + seqid = line[1:].strip() + seq = '' + else: + seq += line.strip() + if seqid and len(seq) > 0: + query(url,seq,alleles,lengths,seqid=seqid,method=method) + fh.close() + except Exception, e: + warn_err("Unable to open input file: %s\n" % e, exit_code=1) + + if header: + outputFile.write(header) + for line in results: + outputFile.write(line) + +if __name__ == "__main__": __main__() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/iedb_api.xml Mon Feb 17 16:04:07 2020 -0500 @@ -0,0 +1,236 @@ +<tool id="iedb_api" name="IEDB" version="0.1.0"> + <description>MHC Binding prediction</description> + <requirements> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command interpreter="python"><![CDATA[ + #import re + iedb_api.py --prediction=$prediction.tool --method=$prediction.method + #if $sequence.seqsrc == 'fasta': + -i $sequence.seq_fasta + #else if $sequence.seqsrc == 'tabular': + -i $sequence.seq_tsv + -c #echo int(str($sequence.pep_col)) - 1 + #if $sequence.id_col: + -C #echo int(str($sequence.id_col)) - 1 + #end if + #else: + #for $seq in str($sequence.seq_text).strip().split(): + -s $seq.strip() + #end for + #end if + #if $alleles.allelesrc == 'history': + #for $line in open(str($alleles.allele_file)): + #set $fields = $line.strip().split(',') + #set $allele = $fields[0].strip() + #if len($allele) > 0: + #if len($fields) > 1: + #for $alen in $fields[1:]: + -a $allele -l $alen + #end for + #else: + #for $alen in str($lengths).split(','): + -a $allele -l $alen + #end for + #end if + #end if + #end for + #else: + #for $word in str($alleles.allele_text).strip().split(): + #set $fields = $word.strip().split(',') + #set $allele = $fields[0].strip() + #if len($allele) > 0: + #if len($fields) > 1: + #for $alen in $fields[1:]: + -a $allele -l $alen + #end for + #else: + #for $alen in str($lengths).split(','): + -a $allele -l $alen + #end for + #end if + #end if + #end for + #end if + -o $output + ]]></command> + <inputs> + <conditional name="sequence"> + <param name="seqsrc" type="select" label="Peptide sequences"> + <option value="fasta">Fasta file</option> + <option value="tabular">From tabular</option> + <option value="entry"></option> + </param> + <when value="fasta"> + <param name="seq_fasta" type="data" format="fasta" label="Peptide Sequence Fasta"/> + </when> + <when value="tabular"> + <param name="seq_tsv" type="data" format="tabular" label="Peptide Sequence Tabular"/> + <param name="pep_col" label="Select column with peptides" type="data_column" numerical="false" data_ref="seq_tsv" /> + <param name="id_col" label="Select column with name" type="data_column" numerical="false" data_ref="seq_tsv" optional="true"/> + </when> + <when value="entry"> + <param name="seq_text" type="text" size="80" label="Peptide Sequence"/> + </when> + </conditional> + <conditional name="alleles"> + <param name="allelesrc" type="select" label="Alleles"> + <option value="history">From history</option> + <option value="entry">Entered</option> + </param> + <when value="history"> + <param name="allele_file" type="data" format="txt" label="Alleles file"> + <help>The dataset should have on allele per line. The allele may be followed by an optional comma-separated list of pepttide lengths, e.g.: HLA-A*02:01,8,9</help> + </param> + </when> + <when value="entry"> + <param name="allele_text" type="text" size="80" label="Alleles"> + <help>Enter alleles separated by white space: HLA-A*03:01 HLA-B*07:02 (The peptide lengths may follow each allele: HLA-A*03:01,8,9,10 HLA-B*07:02,9</help> + <validator type="regex" message="IDs separted by commas">^(HLA-([A-C]|D[PQR][AB]1)\*[0-9][[0-9]:[0-9][0-9](,(8|9|10|11|12|13|14|15))*)(\s+HLA-([A-C]|D[PQR][AB]1)\*[0-9][[0-9]:[0-9][0-9](,(8|9|10|11|12|13|14|15))*)*$</validator> + </param> + </when> + </conditional> + <param name="lengths" type="select" multiple="true" label="peptide lengths for prediction"> + <help>Used for any alleles which don't include specified lengths</help> + <option value="8">8</option> + <option value="9">9</option> + <option value="10">10</option> + <option value="11">11</option> + <option value="12">12</option> + <option value="13">13</option> + <option value="14">14</option> + <option value="15">15</option> + </param> + <conditional name="prediction"> + <param name="tool" type="select" label="Prediction"> + <option value="mhci">MHC-I Binding</option> + <option value="mhcii">MHC-II Binding</option> + <option value="processing">MHC-I Processing</option> + <option value="mhcnp">MHC-NP T-Cell Epitope</option> + <option value="bcell">Antibody Epitope Prediction</option> + </param> + <when value="mhci"> + <param name="method" type="select" label="prediction method"> + <option value="recommended" selected="true">recommended</option> + <option value="consensus">consensus</option> + <option value="netmhcpan">netmhcpan</option> + <option value="ann">ann</option> + <option value="smmpmbec">smmpmbec</option> + <option value="smm">smm</option> + <option value="comblib_sidney2008">comblib_sidney2008</option> + <option value="netmhccons">netmhccons</option> + <option value="pickpocket">pickpocket</option> + </param> + </when> + <when value="mhcii"> + <param name="method" type="select" label="prediction method"> + <option value="recommended" selected="true">recommended</option> + <option value="consensus3">consensus3</option> + <option value="NetMHCIIpan">NetMHCIIpan</option> + <option value="nn_align">nn_align</option> + <option value="smm_align">smm_align</option> + <option value="comblib">comblib</option> + <option value="tepitope">tepitope</option> + </param> + </when> + <when value="processing"> + <param name="method" type="select" label="prediction method"> + <option value="recommended" selected="true">recommended</option> + <option value="consensus">consensus</option> + <option value="netmhcpan">netmhcpan</option> + <option value="ann">ann</option> + <option value="smmpmbec">smmpmbec</option> + <option value="smm">smm</option> + <option value="comblib_sidney2008">comblib_sidney2008</option> + </param> + </when> + <when value="mhcnp"> + <param name="method" type="select" label="prediction method"> + <option value="mhcnp" selected="true">mhcnp</option> + </param> + </when> + <when value="bcell"> + <param name="method" type="select" label="prediction method"> + <option value="Bepipred" selected="true">Bepipred</option> + <option value="Chou-Fasman">Chou-Fasman</option> + <option value="Emini">Emini</option> + <option value="Karplus-Schulz">Karplus-Schulz</option> + <option value="Kolaskar-Tongaonkar">Kolaskar-Tongaonkar</option> + <option value="Parker">Parker</option> + </param> + <param name="window_size" type="integer" value="" optional="true" min="1" label="window_size" help="window_size should be less than the sequence length, and less than 8 for Karplus-Schulz method"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test> + <param name="seqsrc" value="entry"/> + <param name="seq_text" value="SLYNTVATLYCVHQRIDV"/> + <param name="allelesrc" value="entry"/> + <param name="allele_text" value="HLA-A*01:01,9"/> + <param name="tool" value="mhci"/> + <param name="method" value="recommended"/> + <output name="output"> + <assert_contents> + <has_text text="LYNTVATLY" /> + </assert_contents> + </output> + </test> + <test> + <param name="seqsrc" value="fasta"/> + <param name="seq_fasta" ftype="fasta" value="seqs.fa"/> + <param name="allelesrc" value="history"/> + <param name="allele_file" ftype="txt" value="alleles.txt"/> + <param name="tool" value="mhci"/> + <param name="method" value="recommended"/> + <output name="output"> + <assert_contents> + <has_text text="peptide1" /> + <has_text text="AHKVPRRLLK" /> + </assert_contents> + </output> + </test> + <test> + <param name="seqsrc" value="tabular"/> + <param name="seq_tsv" ftype="tabular" value="seqs.tsv"/> + <param name="pep_col" value="3"/> + <param name="id_col" value="1"/> + <param name="allelesrc" value="history"/> + <param name="allele_file" ftype="txt" value="alleles.txt"/> + <param name="tool" value="mhci"/> + <param name="method" value="recommended"/> + <output name="output"> + <assert_contents> + <has_text text="peptide1" /> + <has_text text="AHKVPRRLLK" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +The IEDB is a free resource, funded by a contract from the National Institute of Allergy and Infectious Diseases. It offers easy searching of experimental data characterizing antibody and T cell epitopes studied in humans, non-human primates, and other animal species. + +This tool retrieves epitope information about input peptide sequences by using the RESTful web services provided by IEDB. +The webservices are described at: http://tools.immuneepitope.org/main/tools-api/ + +**INPUTS** + + peptide sequences from a fasta file or a column in a tabular file + + HLA alleles either entered as text or on per line in a text file + + +**OUTPUTS** + + A tabular file containing the results returned from the IEDB web service + + ]]></help> + <citations> + <citation type="doi">10.1093/nar/gku938</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/alleles.tsv Mon Feb 17 16:04:07 2020 -0500 @@ -0,0 +1,2 @@ +HLA-A*01:01,9 +HLA-A*03:01,10