Mercurial > repos > earlhaminst > ensembl_get_sequences
comparison get_sequences.py @ 1:e5dd4bd78bbc draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
| author | earlhaminst |
|---|---|
| date | Mon, 12 Dec 2016 07:47:42 -0500 |
| parents | |
| children | 4b7261f484bb |
comparison
equal
deleted
inserted
replaced
| 0:76b2c482f1e8 | 1:e5dd4bd78bbc |
|---|---|
| 1 # A simple tool to connect to the Ensembl server and retrieve sequences using | |
| 2 # the Ensembl REST API. | |
| 3 import json | |
| 4 import optparse | |
| 5 from itertools import islice | |
| 6 from urlparse import urljoin | |
| 7 | |
| 8 import requests | |
| 9 | |
| 10 parser = optparse.OptionParser() | |
| 11 parser.add_option('-i', '--input', help='List of Ensembl IDs') | |
| 12 | |
| 13 parser.add_option('-s', '--species', type='choice', | |
| 14 choices=['ensembl', 'ensemblgenomes'], default='ensembl', | |
| 15 help='Specify the genome databases for vertebrates and other eukaryotic species') | |
| 16 | |
| 17 parser.add_option('-t', '--type', type='choice', | |
| 18 choices=['genomic', 'cds', 'cdna', 'protein'], | |
| 19 default='genomic', help='Type of sequence') | |
| 20 parser.add_option('--expand_3prime', type='int', default=0, | |
| 21 help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type') | |
| 22 parser.add_option('--expand_5prime', type='int', default=0, | |
| 23 help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type') | |
| 24 options, args = parser.parse_args() | |
| 25 if options.input is None: | |
| 26 raise Exception('-i option must be specified') | |
| 27 | |
| 28 server = 'http://rest.%s.org' % options.species | |
| 29 ext = 'sequence/id' | |
| 30 | |
| 31 headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'} | |
| 32 params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime']) | |
| 33 with open(options.input) as f: | |
| 34 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl | |
| 35 while True: | |
| 36 ids = [line.strip() for line in islice(f, 50)] | |
| 37 if not ids: | |
| 38 break | |
| 39 data = {'ids': ids} | |
| 40 r = requests.post(urljoin(server, ext), params=params, headers=headers, | |
| 41 data=json.dumps(data)) | |
| 42 | |
| 43 if not r.ok: | |
| 44 r.raise_for_status() | |
| 45 | |
| 46 print r.text |
