comparison sift_web/sift_web.py @ 2:19590303d460 draft

Uploaded
author saket-choudhary
date Thu, 04 Sep 2014 17:45:46 -0400
parents
children 697e66764209
comparison
equal deleted inserted replaced
1:d54007492bc4 2:19590303d460
1 #!/usr/bin/env python
2 import requests
3 import argparse
4 import sys
5 from functools import wraps
6 import time
7 from bs4 import BeautifulSoup
8
9 __url__ = 'http://provean.jcvi.org/genome_prg_2.php'
10
11
12 def stop_err(msg, err=1):
13 sys.stderr.write('%s\n' % msg)
14 sys.exit(err)
15
16
17 def retry(ExceptionToCheck, tries=10, delay=3, backoff=2, logger=None):
18 """Retry calling the decorated function using an exponential backoff.
19
20 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
21 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
22
23 :param ExceptionToCheck: the exception to check. may be a tuple of
24 exceptions to check
25 :type ExceptionToCheck: Exception or tuple
26 :param tries: number of times to try (not retry) before giving up
27 :type tries: int
28 :param delay: initial delay between retries in seconds
29 :type delay: int
30 :param backoff: backoff multiplier e.g. value of 2 will double the delay
31 each retry
32 :type backoff: int
33 :param logger: logger to use. If None, print
34 :type logger: logging.Logger instance
35 """
36 def deco_retry(f):
37
38 @wraps(f)
39 def f_retry(*args, **kwargs):
40 mtries, mdelay = tries, delay
41 while mtries > 1:
42 try:
43 return f(*args, **kwargs)
44 except ExceptionToCheck, e:
45 #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay)
46 msg = 'Retrying in %d seconds...' % (mdelay)
47 if logger:
48 logger.warning(msg)
49 else:
50 # print msg
51 pass
52 time.sleep(mdelay)
53 mtries -= 1
54 mdelay *= backoff
55 return f(*args, **kwargs)
56
57 return f_retry # true decorator
58
59 return deco_retry
60
61
62 class SIFTWeb:
63
64 def __init__(self):
65 self.full_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.tsv'
66 self.condensed_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.one.tsv'
67 self.summary_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.summary.tsv'
68 self.url_dict = {'full': self.full_download_url,
69 'condensed': self.condensed_download_url,
70 'summary': self.summary_download_url}
71
72 def upload(self, inputpath):
73 payload = {'table': 'human37_66'}
74 in_txt = open(inputpath, 'rb').read()
75 payload['CHR'] = in_txt
76 request = requests.post( __url__, data=payload)#, files={'CHR_file': open(path)})
77 return request.text
78
79 @retry(requests.exceptions.HTTPError)
80 def get_full_data(self, job_id, full_output):
81 r = requests.request(
82 'GET', (self.full_download_url) % (job_id, job_id))
83 if r.text != 'No file exists':
84 with open(full_output, 'wb') as f:
85 f.write(r.text)
86 else:
87 return requests.HTTPError()
88
89 @retry(requests.exceptions.HTTPError)
90 def get_condensed_data(self, job_id, condensed_output):
91 r = requests.request(
92 'GET', (self.condensed_download_url) % (job_id, job_id))
93 if r.text != 'No file exists':
94 with open(condensed_output, 'wb') as f:
95 f.write(r.text)
96 else:
97 raise(requests.HTTPError())
98
99 @retry(requests.exceptions.HTTPError)
100 def get_summary_data(self, job_id, summary_output):
101 r = requests.request(
102 'GET', (self.summary_download_url) % (job_id, job_id))
103 if r.text != 'No file exists':
104 with open(summary_output, 'wb') as f:
105 f.write(r.text)
106 else:
107 raise(requests.HTTPError())
108
109
110 def main(params):
111 parser = argparse.ArgumentParser()
112 parser.add_argument('--input', type=str, required=True)
113 parser.add_argument('--output1', type=str, required=True)
114 parser.add_argument('--output2', type=str, required=True)
115 parser.add_argument('--output3', type=str, required=True)
116 args = parser.parse_args(params)
117 sift_web = SIFTWeb()
118 content = sift_web.upload(args.input)
119 soup = BeautifulSoup(content)
120 p = soup.findAll('p')
121 job_id = p[1].string.split(':')[-1].replace(' ', '').replace(').', '')
122 sift_web.get_full_data(job_id, args.output1)
123 sift_web.get_condensed_data(job_id, args.output2)
124 sift_web.get_summary_data(job_id, args.output3)
125
126
127 if __name__ == '__main__':
128 main(sys.argv[1:])