Mercurial > repos > galaxyp > retrieve_ensembl_bed
comparison ensembl_rest.py @ 0:da1b538b87e5 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
author | galaxyp |
---|---|
date | Mon, 22 Jan 2018 13:13:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:da1b538b87e5 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 # | |
4 #------------------------------------------------------------------------------ | |
5 # University of Minnesota | |
6 # Copyright 2017, Regents of the University of Minnesota | |
7 #------------------------------------------------------------------------------ | |
8 # Author: | |
9 # | |
10 # James E Johnson | |
11 # | |
12 #------------------------------------------------------------------------------ | |
13 """ | |
14 | |
15 from __future__ import print_function | |
16 from __future__ import unicode_literals | |
17 | |
18 import sys | |
19 | |
20 from time import sleep | |
21 | |
22 import requests | |
23 | |
24 | |
25 server = "https://rest.ensembl.org" | |
26 ext = "/info/assembly/homo_sapiens?" | |
27 max_region = 4000000 | |
28 debug = False | |
29 | |
30 | |
31 def ensembl_rest(ext, headers): | |
32 if debug: | |
33 print("%s" % ext, file=sys.stderr) | |
34 r = requests.get(server+ext, headers=headers) | |
35 if r.status_code == 429: | |
36 print("response headers: %s\n" % r.headers, file=sys.stderr) | |
37 if 'Retry-After' in r.headers: | |
38 sleep(r.headers['Retry-After']) | |
39 r = requests.get(server+ext, headers=headers) | |
40 if not r.ok: | |
41 r.raise_for_status() | |
42 return r | |
43 | |
44 | |
45 def get_species(): | |
46 results = dict() | |
47 ext = "/info/species" | |
48 req_header = {"Content-Type": "application/json"} | |
49 r = ensembl_rest(ext, req_header) | |
50 for species in r.json()['species']: | |
51 results[species['name']] = species | |
52 print("%s\t%s\t%s\t%s\t%s" % | |
53 (species['name'], species['common_name'], | |
54 species['display_name'], | |
55 species['strain'], | |
56 species['taxon_id']), file=sys.stdout) | |
57 return results | |
58 | |
59 | |
60 def get_biotypes(species): | |
61 biotypes = [] | |
62 ext = "/info/biotypes/%s?" % species | |
63 req_header = {"Content-Type": "application/json"} | |
64 r = ensembl_rest(ext, req_header) | |
65 for entry in r.json(): | |
66 if 'biotype' in entry: | |
67 biotypes.append(entry['biotype']) | |
68 return biotypes | |
69 | |
70 | |
71 def get_toplevel(species): | |
72 coord_systems = dict() | |
73 ext = "/info/assembly/%s?" % species | |
74 req_header = {"Content-Type": "application/json"} | |
75 r = ensembl_rest(ext, req_header) | |
76 toplevel = r.json() | |
77 for seq in toplevel['top_level_region']: | |
78 if seq['coord_system'] not in coord_systems: | |
79 coord_systems[seq['coord_system']] = dict() | |
80 coord_system = coord_systems[seq['coord_system']] | |
81 coord_system[seq['name']] = int(seq['length']) | |
82 return coord_systems | |
83 | |
84 | |
85 def get_transcripts_bed(species, refseq, start, length, strand='', | |
86 params=None): | |
87 bed = [] | |
88 param = params if params else '' | |
89 req_header = {"Content-Type": "text/x-bed"} | |
90 regions = list(range(start, length, max_region)) | |
91 if not regions or regions[-1] < length: | |
92 regions.append(length) | |
93 for end in regions[1:]: | |
94 ext = "/overlap/region/%s/%s:%d-%d%s?feature=transcript;%s"\ | |
95 % (species, refseq, start, end, strand, param) | |
96 start = end + 1 | |
97 r = ensembl_rest(ext, req_header) | |
98 if r.text: | |
99 bed += r.text.splitlines() | |
100 return bed | |
101 | |
102 | |
103 def get_seq(id, seqtype, params=None): | |
104 param = params if params else '' | |
105 ext = "/sequence/id/%s?type=%s;%s" % (id, seqtype, param) | |
106 req_header = {"Content-Type": "text/plain"} | |
107 r = ensembl_rest(ext, req_header) | |
108 return r.text | |
109 | |
110 | |
111 def get_cdna(id, params=None): | |
112 return get_seq(id, 'cdna', params=params) | |
113 | |
114 | |
115 def get_cds(id, params=None): | |
116 return get_seq(id, 'cds', params=params) | |
117 | |
118 | |
119 def get_genomic(id, params=None): | |
120 return get_seq(id, 'genomic', params=params) | |
121 | |
122 | |
123 def get_transcript_haplotypes(species, transcript): | |
124 ext = "/transcript_haplotypes/%s/%s?aligned_sequences=1"\ | |
125 % (species, transcript) | |
126 req_header = {"Content-Type": "application/json"} | |
127 r = ensembl_rest(ext, req_header) | |
128 decoded = r.json() | |
129 return decoded |