view get_sequences.py @ 3:bce784076824 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit f5c5f3d6ce676937f5c673ec7fc0631a9f490dc2
author earlhaminst
date Fri, 24 Mar 2017 11:57:48 -0400
parents 4b7261f484bb
children 0fa1d1cc417d
line wrap: on
line source

# A simple tool to connect to the Ensembl server and retrieve sequences using
# the Ensembl REST API.
from __future__ import print_function

import json
import optparse
from itertools import islice

import requests
from six.moves.urllib.parse import urljoin

parser = optparse.OptionParser()
parser.add_option('-i', '--input', help='List of Ensembl IDs')

parser.add_option('-s', '--species', type='choice',
                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
                  help='Specify the genome databases for vertebrates and other eukaryotic species')

parser.add_option('-t', '--type', type='choice',
                  choices=['genomic', 'cds', 'cdna', 'protein'],
                  default='genomic', help='Type of sequence')
parser.add_option('--expand_3prime', type='int', default=0,
                  help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type')
parser.add_option('--expand_5prime', type='int', default=0,
                  help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type')
options, args = parser.parse_args()
if options.input is None:
    raise Exception('-i option must be specified')

server = 'http://rest.%s.org' % options.species
ext = 'sequence/id'

headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'}
params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime'])
with open(options.input) as f:
    # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
    while True:
        ids = [line.strip() for line in islice(f, 50)]
        if not ids:
            break
        data = {'ids': ids}
        r = requests.post(urljoin(server, ext), params=params, headers=headers,
                          data=json.dumps(data))

        if not r.ok:
            r.raise_for_status()

        print(r.text)