annotate sift_web/sift_web.py @ 4:5cc3210369ac draft

Uploaded
author saket-choudhary
date Thu, 04 Sep 2014 17:46:51 -0400
parents 19590303d460
children 697e66764209
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
1 #!/usr/bin/env python
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
2 import requests
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
3 import argparse
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
4 import sys
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
5 from functools import wraps
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
6 import time
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
7 from bs4 import BeautifulSoup
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
8
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
9 __url__ = 'http://provean.jcvi.org/genome_prg_2.php'
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
10
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
11
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
12 def stop_err(msg, err=1):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
13 sys.stderr.write('%s\n' % msg)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
14 sys.exit(err)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
15
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
16
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
17 def retry(ExceptionToCheck, tries=10, delay=3, backoff=2, logger=None):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
18 """Retry calling the decorated function using an exponential backoff.
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
19
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
20 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
21 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
22
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
23 :param ExceptionToCheck: the exception to check. may be a tuple of
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
24 exceptions to check
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
25 :type ExceptionToCheck: Exception or tuple
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
26 :param tries: number of times to try (not retry) before giving up
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
27 :type tries: int
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
28 :param delay: initial delay between retries in seconds
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
29 :type delay: int
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
30 :param backoff: backoff multiplier e.g. value of 2 will double the delay
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
31 each retry
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
32 :type backoff: int
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
33 :param logger: logger to use. If None, print
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
34 :type logger: logging.Logger instance
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
35 """
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
36 def deco_retry(f):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
37
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
38 @wraps(f)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
39 def f_retry(*args, **kwargs):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
40 mtries, mdelay = tries, delay
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
41 while mtries > 1:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
42 try:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
43 return f(*args, **kwargs)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
44 except ExceptionToCheck, e:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
45 #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
46 msg = 'Retrying in %d seconds...' % (mdelay)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
47 if logger:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
48 logger.warning(msg)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
49 else:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
50 # print msg
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
51 pass
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
52 time.sleep(mdelay)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
53 mtries -= 1
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
54 mdelay *= backoff
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
55 return f(*args, **kwargs)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
56
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
57 return f_retry # true decorator
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
58
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
59 return deco_retry
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
60
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
61
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
62 class SIFTWeb:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
63
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
64 def __init__(self):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
65 self.full_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.tsv'
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
66 self.condensed_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.one.tsv'
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
67 self.summary_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.summary.tsv'
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
68 self.url_dict = {'full': self.full_download_url,
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
69 'condensed': self.condensed_download_url,
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
70 'summary': self.summary_download_url}
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
71
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
72 def upload(self, inputpath):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
73 payload = {'table': 'human37_66'}
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
74 in_txt = open(inputpath, 'rb').read()
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
75 payload['CHR'] = in_txt
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
76 request = requests.post( __url__, data=payload)#, files={'CHR_file': open(path)})
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
77 return request.text
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
78
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
79 @retry(requests.exceptions.HTTPError)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
80 def get_full_data(self, job_id, full_output):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
81 r = requests.request(
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
82 'GET', (self.full_download_url) % (job_id, job_id))
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
83 if r.text != 'No file exists':
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
84 with open(full_output, 'wb') as f:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
85 f.write(r.text)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
86 else:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
87 return requests.HTTPError()
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
88
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
89 @retry(requests.exceptions.HTTPError)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
90 def get_condensed_data(self, job_id, condensed_output):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
91 r = requests.request(
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
92 'GET', (self.condensed_download_url) % (job_id, job_id))
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
93 if r.text != 'No file exists':
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
94 with open(condensed_output, 'wb') as f:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
95 f.write(r.text)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
96 else:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
97 raise(requests.HTTPError())
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
98
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
99 @retry(requests.exceptions.HTTPError)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
100 def get_summary_data(self, job_id, summary_output):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
101 r = requests.request(
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
102 'GET', (self.summary_download_url) % (job_id, job_id))
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
103 if r.text != 'No file exists':
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
104 with open(summary_output, 'wb') as f:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
105 f.write(r.text)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
106 else:
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
107 raise(requests.HTTPError())
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
108
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
109
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
110 def main(params):
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
111 parser = argparse.ArgumentParser()
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
112 parser.add_argument('--input', type=str, required=True)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
113 parser.add_argument('--output1', type=str, required=True)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
114 parser.add_argument('--output2', type=str, required=True)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
115 parser.add_argument('--output3', type=str, required=True)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
116 args = parser.parse_args(params)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
117 sift_web = SIFTWeb()
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
118 content = sift_web.upload(args.input)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
119 soup = BeautifulSoup(content)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
120 p = soup.findAll('p')
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
121 job_id = p[1].string.split(':')[-1].replace(' ', '').replace(').', '')
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
122 sift_web.get_full_data(job_id, args.output1)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
123 sift_web.get_condensed_data(job_id, args.output2)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
124 sift_web.get_summary_data(job_id, args.output3)
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
125
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
126
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
127 if __name__ == '__main__':
19590303d460 Uploaded
saket-choudhary
parents:
diff changeset
128 main(sys.argv[1:])