annotate get_sequences.py @ 2:950d9d11b6fb draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
author earlhaminst
date Wed, 21 Dec 2016 15:16:35 -0500
parents 98aba0efe77a
children 675fd774314e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
1 # A simple tool to connect to the Ensembl server and retrieve sequences using
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
2 # the Ensembl REST API.
2
950d9d11b6fb planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents: 1
diff changeset
3 from __future__ import print_function
950d9d11b6fb planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents: 1
diff changeset
4
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
5 import json
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
6 import optparse
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
7 from itertools import islice
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
8
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
9 import requests
2
950d9d11b6fb planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents: 1
diff changeset
10 from six.moves.urllib.parse import urljoin
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
11
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
12 parser = optparse.OptionParser()
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
13 parser.add_option('-i', '--input', help='List of Ensembl IDs')
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
14
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
15 parser.add_option('-s', '--species', type='choice',
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
16 choices=['ensembl', 'ensemblgenomes'], default='ensembl',
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
17 help='Specify the genome databases for vertebrates and other eukaryotic species')
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
18
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
19 parser.add_option('-t', '--type', type='choice',
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
20 choices=['genomic', 'cds', 'cdna', 'protein'],
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
21 default='genomic', help='Type of sequence')
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
22 parser.add_option('--expand_3prime', type='int', default=0,
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
23 help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type')
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
24 parser.add_option('--expand_5prime', type='int', default=0,
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
25 help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type')
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
26 options, args = parser.parse_args()
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
27 if options.input is None:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
28 raise Exception('-i option must be specified')
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
29
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
30 server = 'http://rest.%s.org' % options.species
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
31 ext = 'sequence/id'
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
32
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
33 headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'}
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
34 params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime'])
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
35 with open(options.input) as f:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
36 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
37 while True:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
38 ids = [line.strip() for line in islice(f, 50)]
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
39 if not ids:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
40 break
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
41 data = {'ids': ids}
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
42 r = requests.post(urljoin(server, ext), params=params, headers=headers,
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
43 data=json.dumps(data))
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
44
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
45 if not r.ok:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
46 r.raise_for_status()
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
47
2
950d9d11b6fb planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents: 1
diff changeset
48 print(r.text)