Mercurial > repos > earlhaminst > ensembl_get_sequences
annotate get_genetree.py @ 5:0fa1d1cc417d draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit ed32f2e6d8174873cefcbe141084f857f84b0586"
author | earlhaminst |
---|---|
date | Thu, 31 Oct 2019 07:50:07 -0400 |
parents | 4b7261f484bb |
children | 7af66c2b3831 |
rev | line source |
---|---|
1
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
1 # A simple tool to connect to the Ensembl server and retrieve genetree using |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
2 # the Ensembl REST API. |
2
4b7261f484bb
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
3 from __future__ import print_function |
4b7261f484bb
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
4 |
1
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
5 import optparse |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
6 |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
7 import requests |
2
4b7261f484bb
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
8 from six.moves.urllib.parse import urljoin |
1
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
9 |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
10 parser = optparse.OptionParser() |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
11 parser.add_option('--id_type', type='choice', default='gene_id', |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
12 choices=['gene_id', 'gene_tree_id'], help='Input type') |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
13 parser.add_option('-i', '--input', help='Ensembl ID') |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
14 parser.add_option('--format', type='choice', |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
15 choices=['json', 'orthoxml', 'phyloxml', 'nh'], |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
16 default='json', help='Output format') |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
17 parser.add_option('-s', '--sequence', type='choice', |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
18 choices=['protein', 'cdna', 'none'], default='protein', |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
19 help='The type of sequence to bring back. Setting it to none results in no sequence being returned') |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
20 |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
21 parser.add_option('-a', '--aligned', type='choice', choices=['0', '1'], |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
22 default='0', help='Return the aligned string if true. Otherwise, return the original sequence (no insertions)') |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
23 parser.add_option('-c', '--cigar_line', type='choice', choices=['0', '1'], |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
24 default='0', |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
25 help='Return the aligned sequence encoded in CIGAR format') |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
26 parser.add_option('--nh_format', type='choice', |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
27 choices=['full', 'display_label_composite', 'simple', 'species', 'species_short_name', 'ncbi_taxon', 'ncbi_name', 'njtree', 'phylip'], |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
28 default='simple', |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
29 help='The format of a NH (New Hampshire) request') |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
30 options, args = parser.parse_args() |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
31 if options.input is None: |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
32 raise Exception('-i option must be specified') |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
33 |
5
0fa1d1cc417d
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit ed32f2e6d8174873cefcbe141084f857f84b0586"
earlhaminst
parents:
2
diff
changeset
|
34 server = 'http://rest.ensembl.org' |
1
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
35 |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
36 if options.id_type == 'gene_id': |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
37 ext = 'genetree/member/id' |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
38 elif options.id_type == 'gene_tree_id': |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
39 ext = 'genetree/id' |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
40 |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
41 if options.format == 'json': |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
42 content_type = 'application/json' |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
43 elif options.format == 'orthoxml': |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
44 content_type = 'text/x-orthoxml+xml' |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
45 elif options.format == 'phyloxml': |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
46 content_type = 'text/x-phyloxml+xml' |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
47 elif options.format == 'nh': |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
48 content_type = 'text/x-nh' |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
49 headers = {'Content-Type': content_type} |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
50 params = dict((k, getattr(options, k)) for k in ['sequence', 'aligned', 'cigar_line', 'nh_format']) |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
51 r = requests.get(urljoin(server, '/'.join([ext, options.input])), params=params, headers=headers) |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
52 |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
53 if not r.ok: |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
54 r.raise_for_status() |
e5dd4bd78bbc
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
55 |
2
4b7261f484bb
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
56 print(r.text) |