diff iedb_api.py @ 4:a14128950578 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 98a9dd3bd9c567e8b8e43ac5b54c4ba75a6fe78d"
author jjohnson
date Fri, 28 Feb 2020 15:45:14 -0500
parents 153d5fa7af53
children
line wrap: on
line diff
--- a/iedb_api.py	Wed Feb 26 16:27:06 2020 -0500
+++ b/iedb_api.py	Fri Feb 28 15:45:14 2020 -0500
@@ -34,10 +34,102 @@
                       'processing': range(8, 15),
                       'mhcnp': range(8, 12),
                       'bcell': range(8, 16)}
+prediction_species = {'mhci': [],
+                      'mhcii': range(11, 31),
+                      'processing': range(8, 15),
+                      'mhcnp': range(8, 12),
+                      'bcell': range(8, 16)}
+
+
+def parse_alleles(allelefile, lengths):
+    alleles = []
+    lengths = []
+    with open(allelefile, 'r') as fh:
+        for i, line in enumerate(fh):
+            fields = line.strip().split(',')
+            allele = fields[0].strip()
+            if allele:
+                if len(fields) > 1:
+                    for alen in fields[1:]:
+                        alleles.append(allele)
+                        lengths.append(alen)
+                elif lengths:
+                   for alen in str(lengths).split(','):
+                        alleles.append(allele)
+                        lengths.append(alen)
+                else:
+                    alleles.append(allele)
+    return (alleles, lengths)
+
+
+def query(url, prediction, seq, allele, length, results,
+          seqid=None, method='recommended', proteasome=None,
+          timeout=300, retries=3, sleep=300, debug=False):
+    params = dict()
+    if method:
+        params['method'] = method.encode()
+    if proteasome:
+        params['proteasome'] = proteasome.encode()
+    params['sequence_text'] = seq.strip().encode()
+    if allele is not None:
+        params['allele'] = allele.encode()
+    if length is not None:
+        if prediction == 'bcell':
+            params['window_size'] = str(length).encode()
+        else:
+            params['length'] = str(length).encode()
+    req_data = urlencode(params)
+    if debug:
+        print('url %s %s' % (url, unquote(req_data)), file=sys.stderr)
+    retries = max(0, retries) + 1
+    for retry in range(1, retries):
+        response = None
+        try:
+            response = urlopen(url, data=req_data.encode('utf-8'),
+                               timeout=timeout)
+            if response and response.getcode() == 200:
+                data = [line.decode() for line in response.readlines()]
+                if debug:
+                    print(data, file=sys.stderr)
+                rslts = results['prediction']['entries']
+                for ln, line in enumerate(data):
+                    if 'invalid' in line.lower() or 'tools_api.html' in line:
+                        msg = '%s %s\n%s' % (url, unquote(req_data),
+                                             ''.join(data))
+                        warn_err(msg, exit_code=1)
+                    if line.find('eptide') > 0:
+                        results['prediction']['header'] = "#%s%s" %\
+                            ("ID\t" if seqid else "", line)
+                        continue
+                    elif method == 'Bepipred' and line.find('Residue') > 0:
+                        results['detail']['header'] = "#%s%s" %\
+                            ("ID\t" if seqid else "", line)
+                        rslts = results['detail']['entries']
+                        continue
+                    if seqid:
+                        rslts.extend("%s\t%s" % (seqid, line))
+                    else:
+                        rslts.extend(line)
+                break
+            else:
+                code = response.getcode() if response else 1
+                warn_err("Error connecting to IEDB server\n",
+                         exit_code=code)
+        except HTTPError as e:
+            code = None if retry < retries else e.code
+            warn_err("%d of %d Error connecting to IEDB server %s\n" %
+                     (retry, retries, e),
+                     exit_code=code)
+            time.sleep(sleep)
+        except Exception as e:
+            warn_err("Error connecting to IEDB server %s\n" % e,
+                     exit_code=3)
+    return results
 
 
 def warn_err(msg, exit_code=1):
     sys.stderr.write(msg)
+    sys.stderr.flush()
     if exit_code:
         sys.exit(exit_code)
 
@@ -65,6 +157,9 @@
                         action="append",
                         default=[],
                         help='Alleles for which to make predictions')
+    parser.add_argument('-A', '--allelefile',
+                        default=None,
+                        help='File of HLA alleles')
     parser.add_argument('-l', '--length',
                         action="append",
                         default=[],
@@ -110,8 +205,9 @@
 
     aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'
 
-    if not args.allele and args.prediction != 'bcell':
-        warn_err('-a allele required\n', exit_code=1)
+    if args.prediction != 'bcell':
+        if not args.allele and not args.allelefile:
+            warn_err('-a allele or -A allelefile required\n', exit_code=1)
 
     if not (args.sequence or args.input):
         warn_err('NO Sequences given: ' +
@@ -127,97 +223,35 @@
     else:
         outputFile = sys.stdout
 
-    url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\
-        args.prediction
-    len_param = 'length' if args.prediction != 'bcell' else 'window_size'
-
+    alleles = []
+    lengths = []
     # TODO parse alleles from the args.alleles file
-    alleles = ','.join(args.allele) if args.prediction != 'bcell' else None
-    lengths = ','.join(args.length)
-    if args.prediction == 'bcell':
-        lengths = args.window_size
+    if args.prediction == 'bcell' and args.window_size is not None:
+        lengths.append(str(args.window_size))
+    else:
+        if args.allelefile:
+            (alleles, lengths) = parse_alleles(args.allelefile, args.length)  
+        if args.allele:
+            for i, allele in enumerate(args.allele):
+                alleles.append(allele)
+                alen = args.length[i] if i < len(args.length) else args.length[-1]
+                lengths.append(alen)
+    allele = ','.join(alleles) if alleles else None
+    length = ','.join(lengths) if lengths else None
     method = args.method
     proteasome = args.proteasome if args.prediction == 'processcing' else None
-    global header
-    header = None
-    results = []
-    global header2
-    header2 = None
-    results2 = []
-
-    sequence_text = []
-
-    def add_seq(seqid, seq):
-        sid = seqid if seqid else "peptide%d" % len(sequence_text)
-        sequence_text.append(">%s\n%s" % (sid, seq))
-
-    def query(url, seq, allele, length, seqid=None, method='recommended'):
-        global header
-        global header2
-        params = dict()
-        if method:
-            params['method'] = method.encode()
-        if proteasome:
-            params['proteasome'] = proteasome.encode()
-        params['sequence_text'] = seq.encode()
-        if allele is not None:
-            params['allele'] = allele.encode()
-        if length is not None:
-            params[len_param] = str(length).encode()
-        req_data = urlencode(params)
-        if args.debug:
-            print('url %s %s' % (url, unquote(req_data)), file=sys.stderr)
-        retries = max(0, args.retries) + 1
-        for retry in range(1, retries):
-            response = None
-            try:
-                response = urlopen(url, data=req_data.encode('utf-8'),
-                                   timeout=args.timeout)
-                if response and response.getcode() == 200:
-                    data = [line.decode() for line in response.readlines()]
-                    if args.debug:
-                        print(data, file=sys.stderr)
-                    rslts = results
-                    for ln, line in enumerate(data):
-                        if line.lower().find('invalid') >= 0:
-                            msg = '%s %s\n%s' % (url, unquote(req_data),
-                                                 ''.join(data))
-                            warn_err(msg, exit_code=1)
-                        if line.find('eptide') > 0:
-                            header = "#%s%s" %\
-                                ("ID\t" if seqid else "", line)
-                            if args.debug:
-                                print(header, file=sys.stderr)
-                            continue
-                        elif method == 'Bepipred' and line.find('Residue') > 0:
-                            header2 = "#%s%s" %\
-                                ("ID\t" if seqid else "", line)
-                            if args.debug:
-                                print(header2, file=sys.stderr)
-                            rslts = results2
-                            continue
-                        if seqid:
-                            rslts.extend("%s\t%s" % (seqid, line))
-                        else:
-                            rslts.extend(line)
-                    break
-                else:
-                    code = response.getcode() if response else 1
-                    warn_err("Error connecting to IEDB server\n",
-                             exit_code=code)
-            except HTTPError as e:
-                code = None if retry < args.retries else e.code
-                warn_err("%d of %d Error connecting to IEDB server %s\n" %
-                         (retry, retries, e),
-                         exit_code=code)
-                time.sleep(args.sleep)
-            except Exception as e:
-                warn_err("Error connecting to IEDB server %s\n" % e,
-                         exit_code=3)
+    url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\
+        args.prediction
+    # results
+    results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}}
 
     if args.sequence:
         for i, seq in enumerate(args.sequence):
-            query(url, seq, alleles, lengths, seqid=None, method=method)
+            seqid = 'pep_%d' % i
+            query(url, args.prediction, seq, allele, length, results,
+                  seqid=seqid, method=method, proteasome=proteasome,
+                  timeout=args.timeout, retries=args.retries,
+                  sleep=args.sleep, debug=args.debug)
     if args.input:
         try:
             fh = open(args.input, 'r')
@@ -225,16 +259,19 @@
                 col = int(args.column)
                 idcol = int(args.id_column) if args.id_column else None
                 for i, line in enumerate(fh):
-                    fields = line.split('\t')
+                    fields = line.rstrip('\r\n').split('\t')
                     if len(fields) > col:
-                        seq = re.sub('[_*]', '', fields[col])
+                        seq = re.sub('[_*]', '', fields[col].strip())
                         if re.match(aapat, seq):
                             if idcol is not None and idcol < len(fields):
                                 seqid = fields[idcol]
                             else:
-                                seqid = None
-                            query(url, seq, alleles, lengths,
-                                  seqid=seqid, method=method)
+                                seqid = 'pep_%d' % i
+                            query(url, args.prediction, seq, allele, length,
+                                  results, seqid=seqid, 
+                                  method=method, proteasome=proteasome,
+                                  timeout=args.timeout, retries=args.retries,
+                                  sleep=args.sleep, debug=args.debug)
                         else:
                             warn_err('Line %d, Not a peptide: %s\n' % (i, seq),
                                      exit_code=None)
@@ -244,24 +281,30 @@
                 for i, line in enumerate(fh):
                     if line.startswith('>'):
                         if seqid and len(seq) > 0:
-                            query(url, seq, alleles, lengths,
-                                  seqid=seqid, method=method)
+                            query(url, args.prediction, seq, allele, length,
+                                  results, seqid=seqid, 
+                                  method=method, proteasome=proteasome,
+                                  timeout=args.timeout, retries=args.retries,
+                                  sleep=args.sleep, debug=args.debug)
                         seqid = line[1:].strip()
                         seq = ''
                     else:
                         seq += line.strip()
                 if seqid and len(seq) > 0:
-                    query(url, seq, alleles, lengths,
-                          seqid=seqid, method=method)
+                    query(url, args.prediction, seq, allele, length,
+                          results, seqid=seqid, 
+                          method=method, proteasome=proteasome,
+                          timeout=args.timeout, retries=args.retries,
+                          sleep=args.sleep, debug=args.debug)
             fh.close()
         except Exception as e:
             warn_err("Unable to open input file: %s\n" % e, exit_code=1)
 
-    if header:
-        outputFile.write(header)
-    for line in results:
+    if results['prediction']['header']:
+        outputFile.write(results['prediction']['header'])
+    for line in results['prediction']['entries']:
         outputFile.write(line)
-    if results2:
+    if results['detail']['entries']:
         if args.output2:
             try:
                 outPath = os.path.abspath(args.output2)
@@ -270,9 +313,9 @@
                 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
         else:
             outFile = sys.stdout
-        if header2:
-            outFile.write(header2)
-        for line in results2:
+        if results['detail']['header']:
+            outFile.write(results['detail']['header'])
+        for line in results['detail']['entries']:
             outFile.write(line)