comparison history_metadata_extractor.py @ 0:426b0f85a311 draft

" master branch Updating"
author lain
date Tue, 19 Jul 2022 07:36:57 +0000
parents
children c7f4f2ac38f2
comparison
equal deleted inserted replaced
-1:000000000000 0:426b0f85a311
1 #!/usr/bin/env python
2
3 ## **@AUTHOR**: Lain Pavot - lain.pavot@inrae.fr
4 ## **@DATE**: 22/06/2022
5
6
7 import json
8 import os
9 import sys
10
11
12 with open(os.path.join(sys.path[0], "static", "app.css")) as css:
13 CSS_STYLES = css.read()
14
15 with open(os.path.join(sys.path[0], "vendor", "bootstrap.min.css")) as bootstrap:
16 CSS_STYLES = f"{CSS_STYLES}\n{bootstrap.read()}"
17
18 with open(os.path.join(sys.path[0], "static", "app.js")) as js:
19 JAVASCRIPT = js.read()
20
21 with open(os.path.join(sys.path[0], "static", "app.template.html")) as template:
22 PAGE_TEMPLATE = template.read()
23
24 with open(os.path.join(sys.path[0], "static", "title.template.html")) as template:
25 TITLE_TEMPLATE = template.read()
26
27 with open(os.path.join(sys.path[0], "static", "table.template.html")) as template:
28 TABLE_TEMPLATE = template.read()
29
30 with open(os.path.join(sys.path[0], "static", "header_list.template.html")) as template:
31 HEADER_LIST_TEMPLATE = template.read()
32
33 HEADER_LIST_TEMPLATE = '\n'.join((
34 "<thead>",
35 " <tr>",
36 "{header_list}",
37 " </tr>",
38 "</thead>",
39 ))
40
41 HEADER_TEMPLATE = "<th scope=\"col\">{}</th>"
42 COLUMN_TEMPLATE = "<th scope=\"row\">{}</th>"
43
44 TABLE_LINE_LIST_TEMPLATE = '\n'.join((
45 "<tr class=\"{classes}\">",
46 "{table_lines}",
47 "</tr>",
48 ))
49 TABLE_LINE_TEMPLATE = "<td>{}</td>"
50
51 INDENT = " "
52
53
54 HISTORY_CACHE = {}
55
56 def indent(text):
57 if text.startswith("\n"):
58 return text.replace("\n", f"\n{INDENT}")
59 else:
60 return INDENT+text.replace("\n", f"\n{INDENT}")
61
62 def noempty(ls, as_list=True):
63 if as_list:
64 return [x for x in ls if x]
65 return (x for x in ls if x)
66
67 def join_noempty(ls, sep=';'):
68 return sep.join(noempty(ls, as_list=False))
69
70 def extract_dataset_attributes(dataset_attrs):
71 for dataset_attr in dataset_attrs:
72 HISTORY_CACHE[dataset_attr["encoded_id"]] = dataset_attr
73 HISTORY_CACHE["dataset_attrs"] = dataset_attrs
74
75 def convert_to_html(jobs_attrs, dataset_attrs=None):
76 if dataset_attrs:
77 extract_dataset_attributes(dataset_attrs)
78 return PAGE_TEMPLATE.format(
79 styles=CSS_STYLES.replace("\n<", "\n <"),
80 javascript=JAVASCRIPT,
81 title=indent(indent(get_title(jobs_attrs))),
82 table_list=indent(indent(get_table_list(jobs_attrs)))
83 )
84
85 def get_title(jobs_attrs):
86 galaxy_version = jobs_attrs[0]["galaxy_version"] or "Unknown version"
87 return TITLE_TEMPLATE.format(galaxy_version=galaxy_version)
88
89 def get_table_list(jobs_attrs):
90 return '\n'.join((
91 convert_item_to_table(job_attr, dataset_id)
92 for job_attr in jobs_attrs
93 for dataset_id_set in sorted(list((
94 job_attr["output_dataset_mapping"]
95 or {1:"unknown"}
96 ).values()))
97 for dataset_id in sorted(dataset_id_set)
98 ))
99
100 def convert_item_to_table(job_attr, dataset_id):
101 encoded_jid = job_attr.get("encoded_id")
102 if HISTORY_CACHE:
103 history = HISTORY_CACHE.get(dataset_id, {})
104 hid = history.get("hid", "DELETED")
105 else:
106 hid = "?"
107 exit_code = job_attr.get("exit_code")
108 if job_attr["exit_code"] == 0:
109 status = f"Ok ({exit_code})"
110 classes = "alert alert-success"
111 else:
112 status = f"Failed ({exit_code})"
113 classes = "alert alert-danger"
114 if hid == "DELETED":
115 classes += " history_metadata_extractor_deleted"
116 print(job_attr)
117 tool_name = job_attr["tool_id"] or "unknown"
118 if tool_name.count("/") >= 4:
119 tool_name = job_attr["tool_id"].split("/")[-2]
120 tool_name = tool_name + " - " + job_attr["tool_version"]
121 tool_name = f"[{hid}] - {tool_name}"
122 return TABLE_TEMPLATE.format(
123 classes=classes,
124 tool_name=tool_name,
125 tool_output="",
126 tool_status=status,
127 table=convert_parameters_to_html(job_attr)
128 )
129
130 def convert_parameters_to_html(job_attr):
131 params = job_attr["params"]
132 params_enrichment(job_attr, params)
133 keys = [
134 key for key in iter_parameter_keys(params)
135 if key not in ("dbkey", "chromInfo", "__input_ext", "request_json")
136 ]
137 return '\n'.join((
138 indent(get_table_header(params, ["value", "name", "extension", "hid"])),
139 indent(get_table_lines(params, keys)),
140 ))
141
142 def params_enrichment(job_attr, params):
143 print(params)
144 if (
145 all(map(params.__contains__, ("request_json", "files")))
146 and "encoded_id" in job_attr
147 ):
148 params.update(json.loads(params.pop("request_json")))
149 for i, target in enumerate(params.pop("targets")):
150 files = target["elements"]
151 params["files"][i]["hid"] = join_noempty(
152 str(file["object_id"])
153 for file in files
154 )
155 params["files"][i]["name"] = join_noempty(
156 str(file["name"])
157 for file in files
158 )
159 params["files"][i]["extension"] = join_noempty(
160 str(file["ext"])
161 for file in files
162 )
163
164 def iter_parameter_keys(params):
165 for key in params:
166 param = params[key]
167 if param_is_file(param):
168 yield key
169 elif isinstance(param, dict):
170 for subkey in iter_parameter_keys(param):
171 if subkey not in ("__current_case__", ):
172 yield f"{key}.{subkey}"
173 else:
174 yield key
175
176 def param_is_file(param):
177 return is_file_v1(param) or is_file_v2(param)
178
179 def is_file_v1(param):
180 return (
181 isinstance(param, dict)
182 and all(map(
183 param.__contains__,
184 ("info", "peek", "name", "extension")
185 ))
186 )
187
188 def is_file_v2(param):
189 return (
190 isinstance(param, dict)
191 and "values" in param
192 and isinstance(param["values"], list)
193 and isinstance(param["values"][0], dict)
194 and all(map(param["values"][0].__contains__, ("id", "src")))
195 )
196
197 def get_table_header(params, keys):
198 return HEADER_LIST_TEMPLATE.format(
199 header_list=indent(indent('\n'.join(map(HEADER_TEMPLATE.format, [""]+keys))))
200 )
201
202 def get_table_lines(params, keys):
203 return ''.join(table_lines_iterator(params, keys))
204
205 def table_lines_iterator(params, param_names):
206 keys = ("value", "name", "extension", "hid",)
207 for param_name in param_names:
208 classes = ""
209 table_lines = []
210 subparam = params
211 while '.' in param_name:
212 subkey, param_name = param_name.split('.', 1)
213 subparam = subparam[subkey]
214 for key in keys:
215 param = extract_param_info(key, subparam[param_name])
216 table_lines.append(param)
217 yield TABLE_LINE_LIST_TEMPLATE.format(
218 classes=classes,
219 table_lines=(
220 indent(COLUMN_TEMPLATE.format(param_name) + '\n')
221 + indent('\n'.join(map(
222 TABLE_LINE_TEMPLATE.format,
223 table_lines
224 )))
225 )
226 )
227
228 def extract_param_info(key, param):
229 if key == "value":
230 return extract_param_value(param)
231 if isinstance(param, dict) and key in param:
232 return str(param[key])
233 if isinstance(param, list):
234 return join_noempty(extract_param_info(key, p) for p in param)
235 return ""
236
237 def extract_param_value(param):
238 if isinstance(param, dict):
239 if "__current_case__" in param:
240 return join_dict_key_values(param, ignore=("__current_case__", ))
241 for acceptable_value in ("file_data", "file_name"):
242 if acceptable_value in param:
243 return f"{acceptable_value}: {param[acceptable_value]}"
244 if "values" in param:
245 ids = []
246 for file_id in param["values"]:
247 file_id = file_id["id"]
248 if file_id in HISTORY_CACHE:
249 file_info = HISTORY_CACHE[file_id]
250 param["name"] = file_info["name"]
251 param["hid"] = file_info["hid"]
252 param["extension"] = file_info["extension"]
253 ids.append(file_id)
254 return join_noempty(ids)
255 if isinstance(param, (str, int, float)):
256 return str(param)
257 if isinstance(param, (list, tuple)):
258 return join_noempty(map(extract_param_value, param))
259 return str(param)
260
261 def join_dict_key_values(dico, ignore=()):
262 return join_noempty(
263 f"{name}: {dico[name]}"
264 for name in dico
265 if name not in ignore
266 )
267
268 if __name__ == "__main__":
269 import optparse
270 parser = optparse.OptionParser()
271 parser.add_option(
272 "-j", "--jobs-attrs",
273 dest="jobs_attrs",
274 help="write report of FILE",
275 metavar="FILE",
276 default="jobs_attrs.txt"
277 )
278 parser.add_option(
279 "-d", "--dataset-attrs",
280 dest="dataset_attrs",
281 help="extract additional info from this file",
282 metavar="FILE",
283 default=None,
284 )
285 parser.add_option(
286 "-o", "--output",
287 dest="output",
288 help="write report to FILE",
289 metavar="FILE",
290 default="out.html"
291 )
292 parser.add_option(
293 "-v", "--version",
294 action="store_true",
295 help="Show this script's version and exits",
296 )
297
298 (options, args) = parser.parse_args()
299
300 if options.version:
301
302 import re
303
304 with open(os.path.join(sys.path[0], "README.md")) as readme:
305 for line in readme.readlines():
306 if "**@VERSION**" in line:
307 print(re.search(r"\d+\.\d+\.\d+", line)[0])
308 sys.exit(0)
309
310 with open(options.jobs_attrs) as j:
311 jobs_attrs = json.load(j)
312
313 if options.dataset_attrs is not None:
314 with open(options.dataset_attrs) as ds:
315 dataset_attrs = json.load(ds)
316 else:
317 dataset_attrs = {}
318
319 jobs_attrs = [{
320 key: jobs_attr.get(key)
321 for key in (
322 "galaxy_version",
323 "tool_id",
324 "tool_version",
325 "encoded_id",
326 "params",
327 "output_datasets",
328 "exit_code",
329 "output_dataset_mapping",
330 )
331 } for jobs_attr in jobs_attrs]
332 if jobs_attrs and jobs_attrs[0].get("output_datasets"):
333 jobs_attrs = sorted(
334 jobs_attrs,
335 key=lambda x:x["output_datasets"][0]
336 )
337
338 with open(options.output, "w") as o:
339 o.write(convert_to_html(jobs_attrs, dataset_attrs=dataset_attrs))