annotate get_sequences.py @ 8:935de83b470b draft default tip

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
author earlhaminst
date Mon, 17 Feb 2025 14:49:15 +0000
parents 0618e3bd5138
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
1 # A simple tool to connect to the Ensembl server and retrieve sequences using
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
2 # the Ensembl REST API.
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
3 import json
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
4 import optparse
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
5 from itertools import islice
8
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
6 from urllib.parse import urljoin
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
7
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
8 import requests
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
9
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
10 parser = optparse.OptionParser()
8
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
11 parser.add_option("-i", "--input", help="List of Ensembl IDs")
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
12
8
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
13 parser.add_option(
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
14 "-t",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
15 "--type",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
16 type="choice",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
17 choices=["genomic", "cds", "cdna", "protein"],
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
18 default="genomic",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
19 help="Type of sequence",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
20 )
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
21 parser.add_option(
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
22 "--expand_3prime",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
23 type="int",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
24 default=0,
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
25 help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
26 )
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
27 parser.add_option(
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
28 "--expand_5prime",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
29 type="int",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
30 default=0,
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
31 help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type",
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
32 )
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
33 options, args = parser.parse_args()
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
34 if options.input is None:
8
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
35 raise Exception("-i option must be specified")
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
36
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
37 server = "https://rest.ensembl.org"
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
38 ext = "sequence/id"
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
39
8
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
40 headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"}
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
41 params = {
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
42 k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime")
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
43 }
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
44 with open(options.input) as f:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
45 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
46 while True:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
47 ids = [line.strip() for line in islice(f, 50)]
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
48 if not ids:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
49 break
8
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
50 data = {"ids": ids}
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
51 r = requests.post(
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
52 urljoin(server, ext),
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
53 params=params,
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
54 headers=headers,
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
55 data=json.dumps(data),
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
56 allow_redirects=False,
935de83b470b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
57 )
1
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
58
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
59 if not r.ok:
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
60 r.raise_for_status()
98aba0efe77a planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
61
2
950d9d11b6fb planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents: 1
diff changeset
62 print(r.text)