Mercurial > repos > lain > history_metadata_extractor
comparison history_metadata_extractor.py @ 0:426b0f85a311 draft
" master branch Updating"
author | lain |
---|---|
date | Tue, 19 Jul 2022 07:36:57 +0000 |
parents | |
children | c7f4f2ac38f2 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:426b0f85a311 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ## **@AUTHOR**: Lain Pavot - lain.pavot@inrae.fr | |
4 ## **@DATE**: 22/06/2022 | |
5 | |
6 | |
7 import json | |
8 import os | |
9 import sys | |
10 | |
11 | |
12 with open(os.path.join(sys.path[0], "static", "app.css")) as css: | |
13 CSS_STYLES = css.read() | |
14 | |
15 with open(os.path.join(sys.path[0], "vendor", "bootstrap.min.css")) as bootstrap: | |
16 CSS_STYLES = f"{CSS_STYLES}\n{bootstrap.read()}" | |
17 | |
18 with open(os.path.join(sys.path[0], "static", "app.js")) as js: | |
19 JAVASCRIPT = js.read() | |
20 | |
21 with open(os.path.join(sys.path[0], "static", "app.template.html")) as template: | |
22 PAGE_TEMPLATE = template.read() | |
23 | |
24 with open(os.path.join(sys.path[0], "static", "title.template.html")) as template: | |
25 TITLE_TEMPLATE = template.read() | |
26 | |
27 with open(os.path.join(sys.path[0], "static", "table.template.html")) as template: | |
28 TABLE_TEMPLATE = template.read() | |
29 | |
30 with open(os.path.join(sys.path[0], "static", "header_list.template.html")) as template: | |
31 HEADER_LIST_TEMPLATE = template.read() | |
32 | |
33 HEADER_LIST_TEMPLATE = '\n'.join(( | |
34 "<thead>", | |
35 " <tr>", | |
36 "{header_list}", | |
37 " </tr>", | |
38 "</thead>", | |
39 )) | |
40 | |
41 HEADER_TEMPLATE = "<th scope=\"col\">{}</th>" | |
42 COLUMN_TEMPLATE = "<th scope=\"row\">{}</th>" | |
43 | |
44 TABLE_LINE_LIST_TEMPLATE = '\n'.join(( | |
45 "<tr class=\"{classes}\">", | |
46 "{table_lines}", | |
47 "</tr>", | |
48 )) | |
49 TABLE_LINE_TEMPLATE = "<td>{}</td>" | |
50 | |
51 INDENT = " " | |
52 | |
53 | |
54 HISTORY_CACHE = {} | |
55 | |
56 def indent(text): | |
57 if text.startswith("\n"): | |
58 return text.replace("\n", f"\n{INDENT}") | |
59 else: | |
60 return INDENT+text.replace("\n", f"\n{INDENT}") | |
61 | |
62 def noempty(ls, as_list=True): | |
63 if as_list: | |
64 return [x for x in ls if x] | |
65 return (x for x in ls if x) | |
66 | |
67 def join_noempty(ls, sep=';'): | |
68 return sep.join(noempty(ls, as_list=False)) | |
69 | |
70 def extract_dataset_attributes(dataset_attrs): | |
71 for dataset_attr in dataset_attrs: | |
72 HISTORY_CACHE[dataset_attr["encoded_id"]] = dataset_attr | |
73 HISTORY_CACHE["dataset_attrs"] = dataset_attrs | |
74 | |
75 def convert_to_html(jobs_attrs, dataset_attrs=None): | |
76 if dataset_attrs: | |
77 extract_dataset_attributes(dataset_attrs) | |
78 return PAGE_TEMPLATE.format( | |
79 styles=CSS_STYLES.replace("\n<", "\n <"), | |
80 javascript=JAVASCRIPT, | |
81 title=indent(indent(get_title(jobs_attrs))), | |
82 table_list=indent(indent(get_table_list(jobs_attrs))) | |
83 ) | |
84 | |
85 def get_title(jobs_attrs): | |
86 galaxy_version = jobs_attrs[0]["galaxy_version"] or "Unknown version" | |
87 return TITLE_TEMPLATE.format(galaxy_version=galaxy_version) | |
88 | |
89 def get_table_list(jobs_attrs): | |
90 return '\n'.join(( | |
91 convert_item_to_table(job_attr, dataset_id) | |
92 for job_attr in jobs_attrs | |
93 for dataset_id_set in sorted(list(( | |
94 job_attr["output_dataset_mapping"] | |
95 or {1:"unknown"} | |
96 ).values())) | |
97 for dataset_id in sorted(dataset_id_set) | |
98 )) | |
99 | |
100 def convert_item_to_table(job_attr, dataset_id): | |
101 encoded_jid = job_attr.get("encoded_id") | |
102 if HISTORY_CACHE: | |
103 history = HISTORY_CACHE.get(dataset_id, {}) | |
104 hid = history.get("hid", "DELETED") | |
105 else: | |
106 hid = "?" | |
107 exit_code = job_attr.get("exit_code") | |
108 if job_attr["exit_code"] == 0: | |
109 status = f"Ok ({exit_code})" | |
110 classes = "alert alert-success" | |
111 else: | |
112 status = f"Failed ({exit_code})" | |
113 classes = "alert alert-danger" | |
114 if hid == "DELETED": | |
115 classes += " history_metadata_extractor_deleted" | |
116 print(job_attr) | |
117 tool_name = job_attr["tool_id"] or "unknown" | |
118 if tool_name.count("/") >= 4: | |
119 tool_name = job_attr["tool_id"].split("/")[-2] | |
120 tool_name = tool_name + " - " + job_attr["tool_version"] | |
121 tool_name = f"[{hid}] - {tool_name}" | |
122 return TABLE_TEMPLATE.format( | |
123 classes=classes, | |
124 tool_name=tool_name, | |
125 tool_output="", | |
126 tool_status=status, | |
127 table=convert_parameters_to_html(job_attr) | |
128 ) | |
129 | |
130 def convert_parameters_to_html(job_attr): | |
131 params = job_attr["params"] | |
132 params_enrichment(job_attr, params) | |
133 keys = [ | |
134 key for key in iter_parameter_keys(params) | |
135 if key not in ("dbkey", "chromInfo", "__input_ext", "request_json") | |
136 ] | |
137 return '\n'.join(( | |
138 indent(get_table_header(params, ["value", "name", "extension", "hid"])), | |
139 indent(get_table_lines(params, keys)), | |
140 )) | |
141 | |
142 def params_enrichment(job_attr, params): | |
143 print(params) | |
144 if ( | |
145 all(map(params.__contains__, ("request_json", "files"))) | |
146 and "encoded_id" in job_attr | |
147 ): | |
148 params.update(json.loads(params.pop("request_json"))) | |
149 for i, target in enumerate(params.pop("targets")): | |
150 files = target["elements"] | |
151 params["files"][i]["hid"] = join_noempty( | |
152 str(file["object_id"]) | |
153 for file in files | |
154 ) | |
155 params["files"][i]["name"] = join_noempty( | |
156 str(file["name"]) | |
157 for file in files | |
158 ) | |
159 params["files"][i]["extension"] = join_noempty( | |
160 str(file["ext"]) | |
161 for file in files | |
162 ) | |
163 | |
164 def iter_parameter_keys(params): | |
165 for key in params: | |
166 param = params[key] | |
167 if param_is_file(param): | |
168 yield key | |
169 elif isinstance(param, dict): | |
170 for subkey in iter_parameter_keys(param): | |
171 if subkey not in ("__current_case__", ): | |
172 yield f"{key}.{subkey}" | |
173 else: | |
174 yield key | |
175 | |
176 def param_is_file(param): | |
177 return is_file_v1(param) or is_file_v2(param) | |
178 | |
179 def is_file_v1(param): | |
180 return ( | |
181 isinstance(param, dict) | |
182 and all(map( | |
183 param.__contains__, | |
184 ("info", "peek", "name", "extension") | |
185 )) | |
186 ) | |
187 | |
188 def is_file_v2(param): | |
189 return ( | |
190 isinstance(param, dict) | |
191 and "values" in param | |
192 and isinstance(param["values"], list) | |
193 and isinstance(param["values"][0], dict) | |
194 and all(map(param["values"][0].__contains__, ("id", "src"))) | |
195 ) | |
196 | |
197 def get_table_header(params, keys): | |
198 return HEADER_LIST_TEMPLATE.format( | |
199 header_list=indent(indent('\n'.join(map(HEADER_TEMPLATE.format, [""]+keys)))) | |
200 ) | |
201 | |
202 def get_table_lines(params, keys): | |
203 return ''.join(table_lines_iterator(params, keys)) | |
204 | |
205 def table_lines_iterator(params, param_names): | |
206 keys = ("value", "name", "extension", "hid",) | |
207 for param_name in param_names: | |
208 classes = "" | |
209 table_lines = [] | |
210 subparam = params | |
211 while '.' in param_name: | |
212 subkey, param_name = param_name.split('.', 1) | |
213 subparam = subparam[subkey] | |
214 for key in keys: | |
215 param = extract_param_info(key, subparam[param_name]) | |
216 table_lines.append(param) | |
217 yield TABLE_LINE_LIST_TEMPLATE.format( | |
218 classes=classes, | |
219 table_lines=( | |
220 indent(COLUMN_TEMPLATE.format(param_name) + '\n') | |
221 + indent('\n'.join(map( | |
222 TABLE_LINE_TEMPLATE.format, | |
223 table_lines | |
224 ))) | |
225 ) | |
226 ) | |
227 | |
228 def extract_param_info(key, param): | |
229 if key == "value": | |
230 return extract_param_value(param) | |
231 if isinstance(param, dict) and key in param: | |
232 return str(param[key]) | |
233 if isinstance(param, list): | |
234 return join_noempty(extract_param_info(key, p) for p in param) | |
235 return "" | |
236 | |
237 def extract_param_value(param): | |
238 if isinstance(param, dict): | |
239 if "__current_case__" in param: | |
240 return join_dict_key_values(param, ignore=("__current_case__", )) | |
241 for acceptable_value in ("file_data", "file_name"): | |
242 if acceptable_value in param: | |
243 return f"{acceptable_value}: {param[acceptable_value]}" | |
244 if "values" in param: | |
245 ids = [] | |
246 for file_id in param["values"]: | |
247 file_id = file_id["id"] | |
248 if file_id in HISTORY_CACHE: | |
249 file_info = HISTORY_CACHE[file_id] | |
250 param["name"] = file_info["name"] | |
251 param["hid"] = file_info["hid"] | |
252 param["extension"] = file_info["extension"] | |
253 ids.append(file_id) | |
254 return join_noempty(ids) | |
255 if isinstance(param, (str, int, float)): | |
256 return str(param) | |
257 if isinstance(param, (list, tuple)): | |
258 return join_noempty(map(extract_param_value, param)) | |
259 return str(param) | |
260 | |
261 def join_dict_key_values(dico, ignore=()): | |
262 return join_noempty( | |
263 f"{name}: {dico[name]}" | |
264 for name in dico | |
265 if name not in ignore | |
266 ) | |
267 | |
268 if __name__ == "__main__": | |
269 import optparse | |
270 parser = optparse.OptionParser() | |
271 parser.add_option( | |
272 "-j", "--jobs-attrs", | |
273 dest="jobs_attrs", | |
274 help="write report of FILE", | |
275 metavar="FILE", | |
276 default="jobs_attrs.txt" | |
277 ) | |
278 parser.add_option( | |
279 "-d", "--dataset-attrs", | |
280 dest="dataset_attrs", | |
281 help="extract additional info from this file", | |
282 metavar="FILE", | |
283 default=None, | |
284 ) | |
285 parser.add_option( | |
286 "-o", "--output", | |
287 dest="output", | |
288 help="write report to FILE", | |
289 metavar="FILE", | |
290 default="out.html" | |
291 ) | |
292 parser.add_option( | |
293 "-v", "--version", | |
294 action="store_true", | |
295 help="Show this script's version and exits", | |
296 ) | |
297 | |
298 (options, args) = parser.parse_args() | |
299 | |
300 if options.version: | |
301 | |
302 import re | |
303 | |
304 with open(os.path.join(sys.path[0], "README.md")) as readme: | |
305 for line in readme.readlines(): | |
306 if "**@VERSION**" in line: | |
307 print(re.search(r"\d+\.\d+\.\d+", line)[0]) | |
308 sys.exit(0) | |
309 | |
310 with open(options.jobs_attrs) as j: | |
311 jobs_attrs = json.load(j) | |
312 | |
313 if options.dataset_attrs is not None: | |
314 with open(options.dataset_attrs) as ds: | |
315 dataset_attrs = json.load(ds) | |
316 else: | |
317 dataset_attrs = {} | |
318 | |
319 jobs_attrs = [{ | |
320 key: jobs_attr.get(key) | |
321 for key in ( | |
322 "galaxy_version", | |
323 "tool_id", | |
324 "tool_version", | |
325 "encoded_id", | |
326 "params", | |
327 "output_datasets", | |
328 "exit_code", | |
329 "output_dataset_mapping", | |
330 ) | |
331 } for jobs_attr in jobs_attrs] | |
332 if jobs_attrs and jobs_attrs[0].get("output_datasets"): | |
333 jobs_attrs = sorted( | |
334 jobs_attrs, | |
335 key=lambda x:x["output_datasets"][0] | |
336 ) | |
337 | |
338 with open(options.output, "w") as o: | |
339 o.write(convert_to_html(jobs_attrs, dataset_attrs=dataset_attrs)) |