annotate get_sequences.py @ 8:37cdb55f5258 draft default tip

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
author earlhaminst
date Mon, 17 Feb 2025 14:49:19 +0000
parents d6bb417dc831
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
1 # A simple tool to connect to the Ensembl server and retrieve sequences using
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
2 # the Ensembl REST API.
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
3 import json
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
4 import optparse
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
5 from itertools import islice
8
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
6 from urllib.parse import urljoin
1
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
7
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
8 import requests
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
9
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
10 parser = optparse.OptionParser()
8
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
11 parser.add_option("-i", "--input", help="List of Ensembl IDs")
1
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
12
8
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
13 parser.add_option(
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
14 "-t",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
15 "--type",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
16 type="choice",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
17 choices=["genomic", "cds", "cdna", "protein"],
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
18 default="genomic",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
19 help="Type of sequence",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
20 )
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
21 parser.add_option(
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
22 "--expand_3prime",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
23 type="int",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
24 default=0,
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
25 help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
26 )
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
27 parser.add_option(
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
28 "--expand_5prime",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
29 type="int",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
30 default=0,
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
31 help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type",
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
32 )
1
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
33 options, args = parser.parse_args()
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
34 if options.input is None:
8
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
35 raise Exception("-i option must be specified")
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
36
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
37 server = "https://rest.ensembl.org"
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
38 ext = "sequence/id"
1
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
39
8
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
40 headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"}
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
41 params = {
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
42 k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime")
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
43 }
1
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
44 with open(options.input) as f:
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
45 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
46 while True:
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
47 ids = [line.strip() for line in islice(f, 50)]
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
48 if not ids:
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
49 break
8
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
50 data = {"ids": ids}
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
51 r = requests.post(
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
52 urljoin(server, ext),
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
53 params=params,
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
54 headers=headers,
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
55 data=json.dumps(data),
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
56 allow_redirects=False,
37cdb55f5258 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 7
diff changeset
57 )
1
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
58
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
59 if not r.ok:
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
60 r.raise_for_status()
396f0f54d115 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
61
2
840ea71e6318 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents: 1
diff changeset
62 print(r.text)