Mercurial > repos > earlhaminst > ensembl_get_feature_info
annotate get_sequences/get_sequences.py @ 0:874c067e4527 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
author | earlhaminst |
---|---|
date | Thu, 11 Aug 2016 14:29:29 -0400 |
parents | |
children |
rev | line source |
---|---|
0
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
1 # A simple tool to connect to the Ensembl server and retrieve sequences using |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
2 # the Ensembl REST API. |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
3 import json |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
4 import optparse |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
5 from itertools import islice |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
6 from urlparse import urljoin |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
7 |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
8 import requests |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
9 |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
10 parser = optparse.OptionParser() |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
11 parser.add_option('-i', '--input', help='List of Ensembl IDs') |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
12 |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
13 parser.add_option('-s', '--species', type='choice', |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
14 choices=['ensembl', 'ensemblgenomes'], default='ensembl', |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
15 help='Specify the genome databases for vertebrates and other eukaryotic species') |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
16 |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
17 parser.add_option('-t', '--type', type='choice', |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
18 choices=['genomic', 'cds', 'cdna', 'protein'], |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
19 default='genomic', help='Type of sequence') |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
20 parser.add_option('--expand_3prime', type='int', default=0, |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
21 help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type') |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
22 parser.add_option('--expand_5prime', type='int', default=0, |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
23 help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type') |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
24 options, args = parser.parse_args() |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
25 if options.input is None: |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
26 raise Exception('-i option must be specified') |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
27 |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
28 server = 'http://rest.%s.org' % options.species |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
29 ext = 'sequence/id' |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
30 |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
31 headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'} |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
32 params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime']) |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
33 with open(options.input) as f: |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
34 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
35 while True: |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
36 ids = [line.strip() for line in islice(f, 50)] |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
37 if not ids: |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
38 break |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
39 data = {'ids': ids} |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
40 r = requests.post(urljoin(server, ext), params=params, headers=headers, |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
41 data=json.dumps(data)) |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
42 |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
43 if not r.ok: |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
44 r.raise_for_status() |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
45 |
874c067e4527
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit e80af91bced56efdb4fbf62ac03232655a22f25d-dirty
earlhaminst
parents:
diff
changeset
|
46 print r.text |