annotate aresite2.py @ 1:6e02034384c7 draft

Uploaded
author jfallmann
date Wed, 01 Feb 2017 09:55:11 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
1 # A simple tool to connect to the AREsite server and retrieve feature
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
2 # information using the AREsite REST Interface.
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
3 # Parts of this code are from https://toolshed.g2.bx.psu.edu/repos/earlhaminst/ensembl_get_feature_info
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
4 from __future__ import print_function
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
5
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
6 import json
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
7 import optparse
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
8 import sys
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
9 import urllib
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
10 import urllib2
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
11 import time
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
12 import requests
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
13 from six.moves.urllib.parse import urljoin
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
14
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
15 usage = "usage: %prog [options] arg1 arg2"
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
16 parser = optparse.OptionParser(usage=usage)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
17 parser.add_option('-g', '--gene', help='Gene ID to search for')
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
18 parser.add_option('-m', '--motif', help='Motif to look for', default='ATTTA', type=str)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
19 parser.add_option('-s', '--species', type='choice',
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
20 choices=['Homo_sapiens', 'Mus_musculus', 'Danio_rerio', 'Drosophila_melanogaster', 'Caenorhabditis_elegans'], default='Homo_sapiens',
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
21 help='Specify the species to investigate')
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
22 options, args = parser.parse_args()
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
23
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
24 if options.gene is None:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
25 raise Exception('- Specify the gene you want to look for!')
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
26
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
27 if "," in options.motif :
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
28 raise Exception('- Please only search for single motifs at once')
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
29
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
30 class AREsiteRestClient(object):
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
31 def __init__(self, server='http://rna.tbi.univie.ac.at/AREsite2/api/', reqs_per_sec=1):
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
32 self.server = server
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
33 self.reqs_per_sec = reqs_per_sec
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
34 self.req_count = 0
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
35 self.last_req = 0
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
36
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
37 def perform_rest_action(self, endpoint, hdrs=None, params=None):
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
38 if hdrs is None:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
39 hdrs = {}
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
40
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
41 if 'Content-Type' not in hdrs:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
42 hdrs['Content-Type'] = 'application/json'
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
43
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
44 if params:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
45 endpoint += '?' + urllib.urlencode(params)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
46
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
47 data = None
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
48
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
49 # check if we need to rate limit ourselves
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
50 if self.req_count >= self.reqs_per_sec:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
51 delta = time.time() - self.last_req
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
52 if delta < 1:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
53 time.sleep(1 - delta)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
54 self.last_req = time.time()
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
55 self.req_count = 0
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
56
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
57 try:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
58 request = urllib2.Request(self.server + endpoint, headers=hdrs)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
59 response = urllib2.urlopen(request)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
60 content = response.read()
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
61 if content:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
62 data = json.loads(content)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
63 self.req_count += 1
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
64
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
65 except urllib2.HTTPError, e:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
66 # check if we are being rate limited by the server
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
67 if e.code == 429:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
68 if 'Retry-After' in e.headers:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
69 retry = e.headers['Retry-After']
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
70 time.sleep(float(retry))
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
71 self.perform_rest_action(endpoint, hdrs, params)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
72 else:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
73 sys.stderr.write('Request failed for {0}: Status code: {1.code} Reason: {1.reason}\n'.format(endpoint, e))
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
74
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
75 return data
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
76
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
77 def get_motifs(self, species, gene, motifs):
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
78 query = str('?query={0}&species={1}&list={2}'.format(gene, species, motifs))
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
79 if query:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
80 aresite = self.perform_rest_action(
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
81 query
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
82 )
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
83 return aresite
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
84 return None
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
85
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
86 def run(species, gene, motifs):
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
87 client = AREsiteRestClient()
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
88 aresite = client.get_motifs(species, gene, motifs)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
89 if aresite:
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
90
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
91 mots = aresite["exact_motifs"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
92 starts = aresite["motif_starts"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
93 ends = aresite["motif_ends"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
94 chrs = aresite["chromosomes"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
95 strands = aresite["strands"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
96 transcripts = aresite["transcripts"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
97 genes = aresite["genes"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
98 evh = aresite["hur_evidence"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
99 evt = aresite["ttp_evidence"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
100 eva = aresite["auf_evidence"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
101 anno = aresite["annotation"]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
102
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
103 aresite = zip(chrs,starts,ends,mots,anno,strands,genes,transcripts,evh,evt,eva)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
104
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
105 def getKey(item):
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
106 return item[1]
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
107
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
108 aresite = sorted(aresite, key=getKey)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
109
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
110 # outfile = 'AREsite2_Rest_{0}_{1}_{2}.bed'.format(motifs,gene,species)
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
111 # f = open(outfile, 'w')
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
112
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
113 for i in range(len(aresite)):
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
114 # f.write("\t".join(aresite[i])+"\n")
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
115 print ("\t".join(aresite[i])+"\n")
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
116
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
117
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
118 if __name__ == '__main__':
6e02034384c7 Uploaded
jfallmann
parents:
diff changeset
119 run(options.species, options.gene, options.motif)