Mercurial > repos > earlhaminst > ensembl_get_genetree
diff get_sequences.py @ 8:935de83b470b draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
author | earlhaminst |
---|---|
date | Mon, 17 Feb 2025 14:49:15 +0000 |
parents | 0618e3bd5138 |
children |
line wrap: on
line diff
--- a/get_sequences.py Mon Dec 05 16:29:26 2022 +0000 +++ b/get_sequences.py Mon Feb 17 14:49:15 2025 +0000 @@ -1,42 +1,60 @@ # A simple tool to connect to the Ensembl server and retrieve sequences using # the Ensembl REST API. -from __future__ import print_function - import json import optparse from itertools import islice +from urllib.parse import urljoin import requests -from six.moves.urllib.parse import urljoin parser = optparse.OptionParser() -parser.add_option('-i', '--input', help='List of Ensembl IDs') +parser.add_option("-i", "--input", help="List of Ensembl IDs") -parser.add_option('-t', '--type', type='choice', - choices=['genomic', 'cds', 'cdna', 'protein'], - default='genomic', help='Type of sequence') -parser.add_option('--expand_3prime', type='int', default=0, - help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type') -parser.add_option('--expand_5prime', type='int', default=0, - help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type') +parser.add_option( + "-t", + "--type", + type="choice", + choices=["genomic", "cds", "cdna", "protein"], + default="genomic", + help="Type of sequence", +) +parser.add_option( + "--expand_3prime", + type="int", + default=0, + help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type", +) +parser.add_option( + "--expand_5prime", + type="int", + default=0, + help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type", +) options, args = parser.parse_args() if options.input is None: - raise Exception('-i option must be specified') + raise Exception("-i option must be specified") + +server = "https://rest.ensembl.org" +ext = "sequence/id" -server = 'https://rest.ensembl.org' -ext = 'sequence/id' - -headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'} -params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime']) +headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"} +params = { + k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime") +} with open(options.input) as f: # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl while True: ids = [line.strip() for line in islice(f, 50)] if not ids: break - data = {'ids': ids} - r = requests.post(urljoin(server, ext), params=params, headers=headers, - data=json.dumps(data), allow_redirects=False) + data = {"ids": ids} + r = requests.post( + urljoin(server, ext), + params=params, + headers=headers, + data=json.dumps(data), + allow_redirects=False, + ) if not r.ok: r.raise_for_status()