annotate get_sequences.py @ 7:c79ce2342f1e draft default tip

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
author earlhaminst
date Mon, 17 Feb 2025 14:49:24 +0000
parents 7af66c2b3831
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
1 # A simple tool to connect to the Ensembl server and retrieve sequences using
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
2 # the Ensembl REST API.
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
3 import json
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
4 import optparse
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
5 from itertools import islice
7
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
6 from urllib.parse import urljoin
1
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
7
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
8 import requests
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
9
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
10 parser = optparse.OptionParser()
7
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
11 parser.add_option("-i", "--input", help="List of Ensembl IDs")
1
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
12
7
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
13 parser.add_option(
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
14 "-t",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
15 "--type",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
16 type="choice",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
17 choices=["genomic", "cds", "cdna", "protein"],
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
18 default="genomic",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
19 help="Type of sequence",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
20 )
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
21 parser.add_option(
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
22 "--expand_3prime",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
23 type="int",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
24 default=0,
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
25 help="Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
26 )
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
27 parser.add_option(
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
28 "--expand_5prime",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
29 type="int",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
30 default=0,
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
31 help="Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type",
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
32 )
1
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
33 options, args = parser.parse_args()
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
34 if options.input is None:
7
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
35 raise Exception("-i option must be specified")
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
36
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
37 server = "https://rest.ensembl.org"
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
38 ext = "sequence/id"
1
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
39
7
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
40 headers = {"Content-Type": "text/x-fasta", "Accept": "text/x-fasta"}
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
41 params = {
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
42 k: getattr(options, k) for k in ("type", "expand_3prime", "expand_5prime")
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
43 }
1
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
44 with open(options.input) as f:
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
45 # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
46 while True:
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
47 ids = [line.strip() for line in islice(f, 50)]
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
48 if not ids:
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
49 break
7
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
50 data = {"ids": ids}
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
51 r = requests.post(
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
52 urljoin(server, ext),
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
53 params=params,
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
54 headers=headers,
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
55 data=json.dumps(data),
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
56 allow_redirects=False,
c79ce2342f1e planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 8f8808de862973aedbf87abd4dfa9d2dc7219322
earlhaminst
parents: 6
diff changeset
57 )
1
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
58
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
59 if not r.ok:
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
60 r.raise_for_status()
e5dd4bd78bbc planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
earlhaminst
parents:
diff changeset
61
2
4b7261f484bb planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit 099d38157cec200f0a343579ca9babcd8acb266f
earlhaminst
parents: 1
diff changeset
62 print(r.text)