comparison planemo/lib/python3.7/site-packages/boto/services/result.py @ 0:d30785e31577 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:18:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d30785e31577
1 #!/usr/bin/env python
2 # Copyright (c) 2006,2007 Mitch Garnaat http://garnaat.org/
3 #
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the
6 # "Software"), to deal in the Software without restriction, including
7 # without limitation the rights to use, copy, modify, merge, publish, dis-
8 # tribute, sublicense, and/or sell copies of the Software, and to permit
9 # persons to whom the Software is furnished to do so, subject to the fol-
10 # lowing conditions:
11 #
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 # IN THE SOFTWARE.
22 import os
23 from datetime import datetime, timedelta
24 from boto.utils import parse_ts
25 import boto
26
27 class ResultProcessor(object):
28
29 LogFileName = 'log.csv'
30
31 def __init__(self, batch_name, sd, mimetype_files=None):
32 self.sd = sd
33 self.batch = batch_name
34 self.log_fp = None
35 self.num_files = 0
36 self.total_time = 0
37 self.min_time = timedelta.max
38 self.max_time = timedelta.min
39 self.earliest_time = datetime.max
40 self.latest_time = datetime.min
41 self.queue = self.sd.get_obj('output_queue')
42 self.domain = self.sd.get_obj('output_domain')
43
44 def calculate_stats(self, msg):
45 start_time = parse_ts(msg['Service-Read'])
46 end_time = parse_ts(msg['Service-Write'])
47 elapsed_time = end_time - start_time
48 if elapsed_time > self.max_time:
49 self.max_time = elapsed_time
50 if elapsed_time < self.min_time:
51 self.min_time = elapsed_time
52 self.total_time += elapsed_time.seconds
53 if start_time < self.earliest_time:
54 self.earliest_time = start_time
55 if end_time > self.latest_time:
56 self.latest_time = end_time
57
58 def log_message(self, msg, path):
59 keys = sorted(msg.keys())
60 if not self.log_fp:
61 self.log_fp = open(os.path.join(path, self.LogFileName), 'a')
62 line = ','.join(keys)
63 self.log_fp.write(line+'\n')
64 values = []
65 for key in keys:
66 value = msg[key]
67 if value.find(',') > 0:
68 value = '"%s"' % value
69 values.append(value)
70 line = ','.join(values)
71 self.log_fp.write(line+'\n')
72
73 def process_record(self, record, path, get_file=True):
74 self.log_message(record, path)
75 self.calculate_stats(record)
76 outputs = record['OutputKey'].split(',')
77 if 'OutputBucket' in record:
78 bucket = boto.lookup('s3', record['OutputBucket'])
79 else:
80 bucket = boto.lookup('s3', record['Bucket'])
81 for output in outputs:
82 if get_file:
83 key_name = output.split(';')[0]
84 key = bucket.lookup(key_name)
85 file_name = os.path.join(path, key_name)
86 print('retrieving file: %s to %s' % (key_name, file_name))
87 key.get_contents_to_filename(file_name)
88 self.num_files += 1
89
90 def get_results_from_queue(self, path, get_file=True, delete_msg=True):
91 m = self.queue.read()
92 while m:
93 if 'Batch' in m and m['Batch'] == self.batch:
94 self.process_record(m, path, get_file)
95 if delete_msg:
96 self.queue.delete_message(m)
97 m = self.queue.read()
98
99 def get_results_from_domain(self, path, get_file=True):
100 rs = self.domain.query("['Batch'='%s']" % self.batch)
101 for item in rs:
102 self.process_record(item, path, get_file)
103
104 def get_results_from_bucket(self, path):
105 bucket = self.sd.get_obj('output_bucket')
106 if bucket:
107 print('No output queue or domain, just retrieving files from output_bucket')
108 for key in bucket:
109 file_name = os.path.join(path, key)
110 print('retrieving file: %s to %s' % (key, file_name))
111 key.get_contents_to_filename(file_name)
112 self.num_files + 1
113
114 def get_results(self, path, get_file=True, delete_msg=True):
115 if not os.path.isdir(path):
116 os.mkdir(path)
117 if self.queue:
118 self.get_results_from_queue(path, get_file)
119 elif self.domain:
120 self.get_results_from_domain(path, get_file)
121 else:
122 self.get_results_from_bucket(path)
123 if self.log_fp:
124 self.log_fp.close()
125 print('%d results successfully retrieved.' % self.num_files)
126 if self.num_files > 0:
127 self.avg_time = float(self.total_time)/self.num_files
128 print('Minimum Processing Time: %d' % self.min_time.seconds)
129 print('Maximum Processing Time: %d' % self.max_time.seconds)
130 print('Average Processing Time: %f' % self.avg_time)
131 self.elapsed_time = self.latest_time-self.earliest_time
132 print('Elapsed Time: %d' % self.elapsed_time.seconds)
133 tput = 1.0 / ((self.elapsed_time.seconds/60.0) / self.num_files)
134 print('Throughput: %f transactions / minute' % tput)
135