Mercurial > repos > earlhaminst > ensembl_get_genetree
annotate get_sequences.py @ 8:935de83b470b draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
author | earlhaminst |
---|---|
date | Mon, 17 Feb 2025 14:49:15 +0000 |
parents | 0618e3bd5138 |
children |
rev | line source |
---|---|
1
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
1 # A simple tool to connect to the Ensembl server and retrieve sequences using |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
2 # the Ensembl REST API. |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
3 import json |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
4 import optparse |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
5 from itertools import islice |
8
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
6 from urllib.parse import urljoin |
1
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
7 |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
8 import requests |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
9 |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
10 parser = optparse.OptionParser() |
8
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
11 parser.add_option("-i", "--input", help="List of Ensembl IDs") |
1
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
12 |
8
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
13 parser.add_option( |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
14 "-t", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
15 "--type", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
16 type="choice", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
17 choices=["genomic", "cds", "cdna", "protein"], |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
18 default="genomic", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
19 help="Type of sequence", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
20 ) |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
21 parser.add_option( |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
22 "--expand_3prime", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
23 type="int", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
24 default=0, |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
25 help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
26 ) |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
27 parser.add_option( |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
28 "--expand_5prime", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
29 type="int", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
30 default=0, |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
31 help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type", |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
32 ) |
1
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
33 options, args = parser.parse_args() |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
34 if options.input is None: |
8
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
35 raise Exception("-i option must be specified") |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
36 |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
37 server = "https://rest.ensembl.org" |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
38 ext = "sequence/id" |
1
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
39 |
8
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
40 headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"} |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
41 params = { |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
42 k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime") |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
43 } |
1
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
44 with open(options.input) as f: |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
45 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
46 while True: |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
47 ids = [line.strip() for line in islice(f, 50)] |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
48 if not ids: |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
49 break |
8
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
50 data = {"ids": ids} |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
51 r = requests.post( |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
52 urljoin(server, ext), |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
53 params=params, |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
54 headers=headers, |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
55 data=json.dumps(data), |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
56 allow_redirects=False, |
935de83b470b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents:
6
diff
changeset
|
57 ) |
1
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
58 |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
59 if not r.ok: |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
60 r.raise_for_status() |
98aba0efe77a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff
changeset
|
61 |
2
950d9d11b6fb
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents:
1
diff
changeset
|
62 print(r.text) |