Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/galaxy/tool_util/parser/xml.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 import logging | |
2 import re | |
3 import uuid | |
4 from collections import OrderedDict | |
5 from math import isinf | |
6 | |
7 import packaging.version | |
8 | |
9 from galaxy.tool_util.deps import requirements | |
10 from galaxy.tool_util.parser.util import ( | |
11 DEFAULT_DELTA, | |
12 DEFAULT_DELTA_FRAC | |
13 ) | |
14 from galaxy.util import ( | |
15 string_as_bool, | |
16 xml_text, | |
17 xml_to_string | |
18 ) | |
19 from .interface import ( | |
20 InputSource, | |
21 PageSource, | |
22 PagesSource, | |
23 TestCollectionDef, | |
24 TestCollectionOutputDef, | |
25 ToolSource, | |
26 ) | |
27 from .output_actions import ToolOutputActionGroup | |
28 from .output_collection_def import dataset_collector_descriptions_from_elem | |
29 from .output_objects import ( | |
30 ToolExpressionOutput, | |
31 ToolOutput, | |
32 ToolOutputCollection, | |
33 ToolOutputCollectionStructure | |
34 ) | |
35 from .stdio import ( | |
36 aggressive_error_checks, | |
37 error_on_exit_code, | |
38 StdioErrorLevel, | |
39 ToolStdioExitCode, | |
40 ToolStdioRegex, | |
41 ) | |
42 | |
43 | |
44 log = logging.getLogger(__name__) | |
45 | |
46 | |
47 class XmlToolSource(ToolSource): | |
48 """ Responsible for parsing a tool from classic Galaxy representation. | |
49 """ | |
50 | |
51 def __init__(self, xml_tree, source_path=None, macro_paths=None): | |
52 self.xml_tree = xml_tree | |
53 self.root = xml_tree.getroot() | |
54 self._source_path = source_path | |
55 self._macro_paths = macro_paths or [] | |
56 self.legacy_defaults = self.parse_profile() == "16.01" | |
57 | |
58 def to_string(self): | |
59 return xml_to_string(self.root) | |
60 | |
61 def parse_version(self): | |
62 return self.root.get("version", None) | |
63 | |
64 def parse_id(self): | |
65 return self.root.get("id") | |
66 | |
67 def parse_tool_module(self): | |
68 root = self.root | |
69 if root.find("type") is not None: | |
70 type_elem = root.find("type") | |
71 module = type_elem.get('module', 'galaxy.tools') | |
72 cls = type_elem.get('class') | |
73 return module, cls | |
74 | |
75 return None | |
76 | |
77 def parse_action_module(self): | |
78 root = self.root | |
79 action_elem = root.find("action") | |
80 if action_elem is not None: | |
81 module = action_elem.get('module') | |
82 cls = action_elem.get('class') | |
83 return module, cls | |
84 else: | |
85 return None | |
86 | |
87 def parse_tool_type(self): | |
88 root = self.root | |
89 if root.get('tool_type', None) is not None: | |
90 return root.get('tool_type') | |
91 | |
92 def parse_name(self): | |
93 return self.root.get("name") | |
94 | |
95 def parse_edam_operations(self): | |
96 edam_ops = self.root.find("edam_operations") | |
97 if edam_ops is None: | |
98 return [] | |
99 return [edam_op.text for edam_op in edam_ops.findall("edam_operation")] | |
100 | |
101 def parse_edam_topics(self): | |
102 edam_topics = self.root.find("edam_topics") | |
103 if edam_topics is None: | |
104 return [] | |
105 return [edam_topic.text for edam_topic in edam_topics.findall("edam_topic")] | |
106 | |
107 def parse_xrefs(self): | |
108 xrefs = self.root.find("xrefs") | |
109 if xrefs is None: | |
110 return [] | |
111 return [dict(value=xref.text.strip(), reftype=xref.attrib['type']) for xref in xrefs.findall("xref") if xref.get("type")] | |
112 | |
113 def parse_description(self): | |
114 return xml_text(self.root, "description") | |
115 | |
116 def parse_is_multi_byte(self): | |
117 return self._get_attribute_as_bool("is_multi_byte", self.default_is_multi_byte) | |
118 | |
119 def parse_display_interface(self, default): | |
120 return self._get_attribute_as_bool("display_interface", default) | |
121 | |
122 def parse_require_login(self, default): | |
123 return self._get_attribute_as_bool("require_login", default) | |
124 | |
125 def parse_request_param_translation_elem(self): | |
126 return self.root.find("request_param_translation") | |
127 | |
128 def parse_command(self): | |
129 command_el = self._command_el | |
130 return ((command_el is not None) and command_el.text) or None | |
131 | |
132 def parse_expression(self): | |
133 """ Return string containing command to run. | |
134 """ | |
135 expression_el = self.root.find("expression") | |
136 if expression_el is not None: | |
137 expression_type = expression_el.get("type") | |
138 if expression_type != "ecma5.1": | |
139 raise Exception("Unknown expression type [%s] encountered" % expression_type) | |
140 return expression_el.text | |
141 return None | |
142 | |
143 def parse_environment_variables(self): | |
144 environment_variables_el = self.root.find("environment_variables") | |
145 if environment_variables_el is None: | |
146 return [] | |
147 | |
148 environment_variables = [] | |
149 for environment_variable_el in environment_variables_el.findall("environment_variable"): | |
150 template = environment_variable_el.text | |
151 inject = environment_variable_el.get("inject") | |
152 if inject: | |
153 assert not template, "Cannot specify inject and environment variable template." | |
154 assert inject in ["api_key"] | |
155 if template: | |
156 assert not inject, "Cannot specify inject and environment variable template." | |
157 definition = { | |
158 "name": environment_variable_el.get("name"), | |
159 "template": template, | |
160 "inject": inject, | |
161 "strip": string_as_bool(environment_variable_el.get("strip", False)), | |
162 } | |
163 environment_variables.append( | |
164 definition | |
165 ) | |
166 return environment_variables | |
167 | |
168 def parse_home_target(self): | |
169 target = "job_home" if self.parse_profile() >= "18.01" else "shared_home" | |
170 command_el = self._command_el | |
171 command_legacy = (command_el is not None) and command_el.get("use_shared_home", None) | |
172 if command_legacy is not None: | |
173 target = "shared_home" if string_as_bool(command_legacy) else "job_home" | |
174 return target | |
175 | |
176 def parse_tmp_target(self): | |
177 # Default to not touching TMPDIR et. al. but if job_tmp is set | |
178 # in job_conf then do. This is a very conservative approach that shouldn't | |
179 # break or modify any configurations by default. | |
180 return "job_tmp_if_explicit" | |
181 | |
182 def parse_docker_env_pass_through(self): | |
183 if self.parse_profile() < "18.01": | |
184 return ["GALAXY_SLOTS"] | |
185 else: | |
186 # Pass home, etc... | |
187 return super(XmlToolSource, self).parse_docker_env_pass_through() | |
188 | |
189 def parse_interpreter(self): | |
190 interpreter = None | |
191 command_el = self._command_el | |
192 if command_el is not None: | |
193 interpreter = command_el.get("interpreter", None) | |
194 if interpreter and not self.legacy_defaults: | |
195 log.warning("Deprecated interpreter attribute on command element is now ignored.") | |
196 interpreter = None | |
197 return interpreter | |
198 | |
199 def parse_version_command(self): | |
200 version_cmd = self.root.find("version_command") | |
201 if version_cmd is not None: | |
202 return version_cmd.text | |
203 else: | |
204 return None | |
205 | |
206 def parse_version_command_interpreter(self): | |
207 if self.parse_version_command() is not None: | |
208 version_cmd = self.root.find("version_command") | |
209 version_cmd_interpreter = version_cmd.get("interpreter", None) | |
210 if version_cmd_interpreter: | |
211 return version_cmd_interpreter | |
212 return None | |
213 | |
214 def parse_parallelism(self): | |
215 parallelism = self.root.find("parallelism") | |
216 parallelism_info = None | |
217 if parallelism is not None and parallelism.get("method"): | |
218 return ParallelismInfo(parallelism) | |
219 return parallelism_info | |
220 | |
221 def parse_interactivetool(self): | |
222 interactivetool_el = self.root.find("entry_points") | |
223 rtt = [] | |
224 if interactivetool_el is None: | |
225 return rtt | |
226 for ep_el in interactivetool_el.findall("entry_point"): | |
227 port = ep_el.find("port") | |
228 assert port is not None, ValueError('A port is required for InteractiveTools') | |
229 port = port.text.strip() | |
230 url = ep_el.find("url") | |
231 if url is not None: | |
232 url = url.text.strip() | |
233 name = ep_el.get('name', None) | |
234 if name: | |
235 name = name.strip() | |
236 requires_domain = string_as_bool(ep_el.attrib.get("requires_domain", False)) | |
237 rtt.append(dict(port=port, url=url, name=name, requires_domain=requires_domain)) | |
238 return rtt | |
239 | |
240 def parse_hidden(self): | |
241 hidden = xml_text(self.root, "hidden") | |
242 if hidden: | |
243 hidden = string_as_bool(hidden) | |
244 return hidden | |
245 | |
246 def parse_redirect_url_params_elem(self): | |
247 return self.root.find("redirect_url_params") | |
248 | |
249 def parse_sanitize(self): | |
250 return self._get_option_value("sanitize", True) | |
251 | |
252 def parse_refresh(self): | |
253 return self._get_option_value("refresh", False) | |
254 | |
255 def _get_option_value(self, key, default): | |
256 root = self.root | |
257 for option_elem in root.findall("options"): | |
258 if key in option_elem.attrib: | |
259 return string_as_bool(option_elem.get(key)) | |
260 return default | |
261 | |
262 @property | |
263 def _command_el(self): | |
264 return self.root.find("command") | |
265 | |
266 def _get_attribute_as_bool(self, attribute, default, elem=None): | |
267 if elem is None: | |
268 elem = self.root | |
269 return string_as_bool(elem.get(attribute, default)) | |
270 | |
271 def parse_requirements_and_containers(self): | |
272 return requirements.parse_requirements_from_xml(self.root) | |
273 | |
274 def parse_input_pages(self): | |
275 return XmlPagesSource(self.root) | |
276 | |
277 def parse_provided_metadata_style(self): | |
278 style = None | |
279 out_elem = self.root.find("outputs") | |
280 if out_elem is not None and "provided_metadata_style" in out_elem.attrib: | |
281 style = out_elem.attrib["provided_metadata_style"] | |
282 | |
283 if style is None: | |
284 style = "legacy" if self.parse_profile() < "17.09" else "default" | |
285 | |
286 assert style in ["legacy", "default"] | |
287 return style | |
288 | |
289 def parse_provided_metadata_file(self): | |
290 provided_metadata_file = "galaxy.json" | |
291 out_elem = self.root.find("outputs") | |
292 if out_elem is not None and "provided_metadata_file" in out_elem.attrib: | |
293 provided_metadata_file = out_elem.attrib["provided_metadata_file"] | |
294 | |
295 return provided_metadata_file | |
296 | |
297 def parse_outputs(self, tool): | |
298 out_elem = self.root.find("outputs") | |
299 outputs = OrderedDict() | |
300 output_collections = OrderedDict() | |
301 if out_elem is None: | |
302 return outputs, output_collections | |
303 | |
304 data_dict = OrderedDict() | |
305 | |
306 def _parse(data_elem, **kwds): | |
307 output_def = self._parse_output(data_elem, tool, **kwds) | |
308 data_dict[output_def.name] = output_def | |
309 return output_def | |
310 | |
311 for _ in out_elem.findall("data"): | |
312 _parse(_) | |
313 | |
314 def _parse_expression(output_elem, **kwds): | |
315 output_def = self._parse_expression_output(output_elem, tool, **kwds) | |
316 output_def.filters = output_elem.findall('filter') | |
317 data_dict[output_def.name] = output_def | |
318 return output_def | |
319 | |
320 def _parse_collection(collection_elem): | |
321 name = collection_elem.get("name") | |
322 label = xml_text(collection_elem, "label") | |
323 default_format = collection_elem.get("format", "data") | |
324 collection_type = collection_elem.get("type", None) | |
325 collection_type_source = collection_elem.get("type_source", None) | |
326 collection_type_from_rules = collection_elem.get("type_from_rules", None) | |
327 structured_like = collection_elem.get("structured_like", None) | |
328 inherit_format = False | |
329 inherit_metadata = False | |
330 if structured_like: | |
331 inherit_format = string_as_bool(collection_elem.get("inherit_format", None)) | |
332 inherit_metadata = string_as_bool(collection_elem.get("inherit_metadata", None)) | |
333 default_format_source = collection_elem.get("format_source", None) | |
334 default_metadata_source = collection_elem.get("metadata_source", "") | |
335 filters = collection_elem.findall('filter') | |
336 | |
337 dataset_collector_descriptions = None | |
338 if collection_elem.find("discover_datasets") is not None: | |
339 dataset_collector_descriptions = dataset_collector_descriptions_from_elem(collection_elem, legacy=False) | |
340 structure = ToolOutputCollectionStructure( | |
341 collection_type=collection_type, | |
342 collection_type_source=collection_type_source, | |
343 collection_type_from_rules=collection_type_from_rules, | |
344 structured_like=structured_like, | |
345 dataset_collector_descriptions=dataset_collector_descriptions, | |
346 ) | |
347 output_collection = ToolOutputCollection( | |
348 name, | |
349 structure, | |
350 label=label, | |
351 filters=filters, | |
352 default_format=default_format, | |
353 inherit_format=inherit_format, | |
354 inherit_metadata=inherit_metadata, | |
355 default_format_source=default_format_source, | |
356 default_metadata_source=default_metadata_source, | |
357 ) | |
358 outputs[output_collection.name] = output_collection | |
359 | |
360 for data_elem in collection_elem.findall("data"): | |
361 _parse( | |
362 data_elem, | |
363 default_format=default_format, | |
364 default_format_source=default_format_source, | |
365 default_metadata_source=default_metadata_source, | |
366 ) | |
367 | |
368 for data_elem in collection_elem.findall("data"): | |
369 output_name = data_elem.get("name") | |
370 data = data_dict[output_name] | |
371 assert data | |
372 del data_dict[output_name] | |
373 output_collection.outputs[output_name] = data | |
374 output_collections[name] = output_collection | |
375 | |
376 for out_child in out_elem: | |
377 if out_child.tag == "data": | |
378 _parse(out_child) | |
379 elif out_child.tag == "collection": | |
380 _parse_collection(out_child) | |
381 elif out_child.tag == "output": | |
382 output_type = out_child.get("type") | |
383 if output_type == "data": | |
384 _parse(out_child) | |
385 elif output_type == "collection": | |
386 out_child.attrib["type"] = out_child.get("collection_type") | |
387 out_child.attrib["type_source"] = out_child.get("collection_type_source") | |
388 _parse_collection(out_child) | |
389 else: | |
390 _parse_expression(out_child) | |
391 else: | |
392 log.warning("Unknown output tag encountered [%s]" % out_child.tag) | |
393 | |
394 for output_def in data_dict.values(): | |
395 outputs[output_def.name] = output_def | |
396 return outputs, output_collections | |
397 | |
398 def _parse_output( | |
399 self, | |
400 data_elem, | |
401 tool, | |
402 default_format="data", | |
403 default_format_source=None, | |
404 default_metadata_source="", | |
405 expression_type=None, | |
406 ): | |
407 output = ToolOutput(data_elem.get("name")) | |
408 output_format = data_elem.get("format", default_format) | |
409 auto_format = string_as_bool(data_elem.get("auto_format", "false")) | |
410 if auto_format and output_format != "data": | |
411 raise ValueError("Setting format and auto_format is not supported at this time.") | |
412 elif auto_format: | |
413 output_format = "_sniff_" | |
414 output.format = output_format | |
415 output.change_format = data_elem.findall("change_format") | |
416 output.format_source = data_elem.get("format_source", default_format_source) | |
417 output.default_identifier_source = data_elem.get("default_identifier_source", 'None') | |
418 output.metadata_source = data_elem.get("metadata_source", default_metadata_source) | |
419 output.parent = data_elem.get("parent", None) | |
420 output.label = xml_text(data_elem, "label") | |
421 output.count = int(data_elem.get("count", 1)) | |
422 output.filters = data_elem.findall('filter') | |
423 output.tool = tool | |
424 output.from_work_dir = data_elem.get("from_work_dir", None) | |
425 output.hidden = string_as_bool(data_elem.get("hidden", "")) | |
426 output.actions = ToolOutputActionGroup(output, data_elem.find('actions')) | |
427 output.dataset_collector_descriptions = dataset_collector_descriptions_from_elem(data_elem, legacy=self.legacy_defaults) | |
428 return output | |
429 | |
430 def _parse_expression_output(self, output_elem, tool, **kwds): | |
431 output_type = output_elem.get("type") | |
432 from_expression = output_elem.get("from") | |
433 output = ToolExpressionOutput( | |
434 output_elem.get("name"), | |
435 output_type, | |
436 from_expression, | |
437 ) | |
438 output.path = output_elem.get("value") | |
439 output.label = xml_text(output_elem, "label") | |
440 | |
441 output.hidden = string_as_bool(output_elem.get("hidden", "")) | |
442 output.actions = ToolOutputActionGroup(output, output_elem.find('actions')) | |
443 output.dataset_collector_descriptions = [] | |
444 return output | |
445 | |
446 def parse_stdio(self): | |
447 """ | |
448 parse error handling from command and stdio tag | |
449 | |
450 returns list of exit codes, list of regexes | |
451 - exit_codes contain all non-zero exit codes (:-1 and 1:) if | |
452 detect_errors is default (if not legacy), exit_code, or aggressive | |
453 - the oom_exit_code if given and detect_errors is exit_code | |
454 - exit codes and regexes from the stdio tag | |
455 these are prepended to the list, i.e. are evaluated prior to regexes | |
456 and exit codes derived from the properties of the command tag. | |
457 thus more specific regexes of the same or more severe error level | |
458 are triggered first. | |
459 """ | |
460 | |
461 command_el = self._command_el | |
462 detect_errors = None | |
463 if command_el is not None: | |
464 detect_errors = command_el.get("detect_errors") | |
465 | |
466 if detect_errors and detect_errors != "default": | |
467 if detect_errors == "exit_code": | |
468 oom_exit_code = None | |
469 if command_el is not None: | |
470 oom_exit_code = command_el.get("oom_exit_code", None) | |
471 if oom_exit_code is not None: | |
472 int(oom_exit_code) | |
473 exit_codes, regexes = error_on_exit_code(out_of_memory_exit_code=oom_exit_code) | |
474 elif detect_errors == "aggressive": | |
475 exit_codes, regexes = aggressive_error_checks() | |
476 else: | |
477 raise ValueError("Unknown detect_errors value encountered [%s]" % detect_errors) | |
478 elif len(self.root.findall('stdio')) == 0 and not self.legacy_defaults: | |
479 exit_codes, regexes = error_on_exit_code() | |
480 else: | |
481 exit_codes = [] | |
482 regexes = [] | |
483 | |
484 if len(self.root.findall('stdio')) > 0: | |
485 parser = StdioParser(self.root) | |
486 exit_codes = parser.stdio_exit_codes + exit_codes | |
487 regexes = parser.stdio_regexes + regexes | |
488 | |
489 return exit_codes, regexes | |
490 | |
491 def parse_strict_shell(self): | |
492 command_el = self._command_el | |
493 if packaging.version.parse(self.parse_profile()) < packaging.version.parse('20.09'): | |
494 default = "False" | |
495 else: | |
496 default = "True" | |
497 if command_el is not None: | |
498 return string_as_bool(command_el.get("strict", default)) | |
499 else: | |
500 return string_as_bool(default) | |
501 | |
502 def parse_help(self): | |
503 help_elem = self.root.find('help') | |
504 return help_elem.text if help_elem is not None else None | |
505 | |
506 @property | |
507 def macro_paths(self): | |
508 return self._macro_paths | |
509 | |
510 @property | |
511 def source_path(self): | |
512 return self._source_path | |
513 | |
514 def parse_tests_to_dict(self): | |
515 tests_elem = self.root.find("tests") | |
516 tests = [] | |
517 rval = dict( | |
518 tests=tests | |
519 ) | |
520 | |
521 if tests_elem is not None: | |
522 for i, test_elem in enumerate(tests_elem.findall("test")): | |
523 tests.append(_test_elem_to_dict(test_elem, i)) | |
524 | |
525 return rval | |
526 | |
527 def parse_profile(self): | |
528 # Pre-16.04 or default XML defaults | |
529 # - Use standard error for error detection. | |
530 # - Don't run shells with -e | |
531 # - Auto-check for implicit multiple outputs. | |
532 # - Auto-check for $param_file. | |
533 # - Enable buggy interpreter attribute. | |
534 return self.root.get("profile", "16.01") | |
535 | |
536 def parse_python_template_version(self): | |
537 python_template_version = self.root.get("python_template_version", None) | |
538 if python_template_version is not None: | |
539 python_template_version = packaging.version.parse(python_template_version) | |
540 return python_template_version | |
541 | |
542 | |
543 def _test_elem_to_dict(test_elem, i): | |
544 rval = dict( | |
545 outputs=__parse_output_elems(test_elem), | |
546 output_collections=__parse_output_collection_elems(test_elem), | |
547 inputs=__parse_input_elems(test_elem, i), | |
548 expect_num_outputs=test_elem.get("expect_num_outputs"), | |
549 command=__parse_assert_list_from_elem(test_elem.find("assert_command")), | |
550 command_version=__parse_assert_list_from_elem(test_elem.find("assert_command_version")), | |
551 stdout=__parse_assert_list_from_elem(test_elem.find("assert_stdout")), | |
552 stderr=__parse_assert_list_from_elem(test_elem.find("assert_stderr")), | |
553 expect_exit_code=test_elem.get("expect_exit_code"), | |
554 expect_failure=string_as_bool(test_elem.get("expect_failure", False)), | |
555 maxseconds=test_elem.get("maxseconds", None), | |
556 ) | |
557 _copy_to_dict_if_present(test_elem, rval, ["num_outputs"]) | |
558 return rval | |
559 | |
560 | |
561 def __parse_input_elems(test_elem, i): | |
562 __expand_input_elems(test_elem) | |
563 return __parse_inputs_elems(test_elem, i) | |
564 | |
565 | |
566 def __parse_output_elems(test_elem): | |
567 outputs = [] | |
568 for output_elem in test_elem.findall("output"): | |
569 name, file, attributes = __parse_output_elem(output_elem) | |
570 outputs.append({"name": name, "value": file, "attributes": attributes}) | |
571 return outputs | |
572 | |
573 | |
574 def __parse_output_elem(output_elem): | |
575 attrib = dict(output_elem.attrib) | |
576 name = attrib.pop('name', None) | |
577 if name is None: | |
578 raise Exception("Test output does not have a 'name'") | |
579 | |
580 file, attributes = __parse_test_attributes(output_elem, attrib, parse_discovered_datasets=True) | |
581 return name, file, attributes | |
582 | |
583 | |
584 def __parse_command_elem(test_elem): | |
585 assert_elem = test_elem.find("command") | |
586 return __parse_assert_list_from_elem(assert_elem) | |
587 | |
588 | |
589 def __parse_output_collection_elems(test_elem): | |
590 output_collections = [] | |
591 for output_collection_elem in test_elem.findall("output_collection"): | |
592 output_collection_def = __parse_output_collection_elem(output_collection_elem) | |
593 output_collections.append(output_collection_def) | |
594 return output_collections | |
595 | |
596 | |
597 def __parse_output_collection_elem(output_collection_elem): | |
598 attrib = dict(output_collection_elem.attrib) | |
599 name = attrib.pop('name', None) | |
600 if name is None: | |
601 raise Exception("Test output collection does not have a 'name'") | |
602 element_tests = __parse_element_tests(output_collection_elem) | |
603 return TestCollectionOutputDef(name, attrib, element_tests).to_dict() | |
604 | |
605 | |
606 def __parse_element_tests(parent_element): | |
607 element_tests = {} | |
608 for element in parent_element.findall("element"): | |
609 element_attrib = dict(element.attrib) | |
610 identifier = element_attrib.pop('name', None) | |
611 if identifier is None: | |
612 raise Exception("Test primary dataset does not have a 'identifier'") | |
613 element_tests[identifier] = __parse_test_attributes(element, element_attrib, parse_elements=True) | |
614 return element_tests | |
615 | |
616 | |
617 def __parse_test_attributes(output_elem, attrib, parse_elements=False, parse_discovered_datasets=False): | |
618 assert_list = __parse_assert_list(output_elem) | |
619 | |
620 # Allow either file or value to specify a target file to compare result with | |
621 # file was traditionally used by outputs and value by extra files. | |
622 file = attrib.pop('file', attrib.pop('value', None)) | |
623 | |
624 # File no longer required if an list of assertions was present. | |
625 attributes = {} | |
626 # Method of comparison | |
627 attributes['compare'] = attrib.pop('compare', 'diff').lower() | |
628 # Number of lines to allow to vary in logs (for dates, etc) | |
629 attributes['lines_diff'] = int(attrib.pop('lines_diff', '0')) | |
630 # Allow a file size to vary if sim_size compare | |
631 attributes['delta'] = int(attrib.pop('delta', DEFAULT_DELTA)) | |
632 attributes['delta_frac'] = float(attrib['delta_frac']) if 'delta_frac' in attrib else DEFAULT_DELTA_FRAC | |
633 attributes['sort'] = string_as_bool(attrib.pop('sort', False)) | |
634 attributes['decompress'] = string_as_bool(attrib.pop('decompress', False)) | |
635 extra_files = [] | |
636 if 'ftype' in attrib: | |
637 attributes['ftype'] = attrib['ftype'] | |
638 for extra in output_elem.findall('extra_files'): | |
639 extra_files.append(__parse_extra_files_elem(extra)) | |
640 metadata = {} | |
641 for metadata_elem in output_elem.findall('metadata'): | |
642 metadata[metadata_elem.get('name')] = metadata_elem.get('value') | |
643 md5sum = attrib.get("md5", None) | |
644 checksum = attrib.get("checksum", None) | |
645 element_tests = {} | |
646 if parse_elements: | |
647 element_tests = __parse_element_tests(output_elem) | |
648 | |
649 primary_datasets = {} | |
650 if parse_discovered_datasets: | |
651 for primary_elem in (output_elem.findall("discovered_dataset") or []): | |
652 primary_attrib = dict(primary_elem.attrib) | |
653 designation = primary_attrib.pop('designation', None) | |
654 if designation is None: | |
655 raise Exception("Test primary dataset does not have a 'designation'") | |
656 primary_datasets[designation] = __parse_test_attributes(primary_elem, primary_attrib) | |
657 | |
658 has_checksum = md5sum or checksum | |
659 has_nested_tests = extra_files or element_tests or primary_datasets | |
660 if not (assert_list or file or metadata or has_checksum or has_nested_tests): | |
661 raise Exception("Test output defines nothing to check (e.g. must have a 'file' check against, assertions to check, metadata or checksum tests, etc...)") | |
662 attributes['assert_list'] = assert_list | |
663 attributes['extra_files'] = extra_files | |
664 attributes['metadata'] = metadata | |
665 attributes['md5'] = md5sum | |
666 attributes['checksum'] = checksum | |
667 attributes['elements'] = element_tests | |
668 attributes['primary_datasets'] = primary_datasets | |
669 return file, attributes | |
670 | |
671 | |
672 def __parse_assert_list(output_elem): | |
673 assert_elem = output_elem.find("assert_contents") | |
674 return __parse_assert_list_from_elem(assert_elem) | |
675 | |
676 | |
677 def __parse_assert_list_from_elem(assert_elem): | |
678 assert_list = None | |
679 | |
680 def convert_elem(elem): | |
681 """ Converts and XML element to a dictionary format, used by assertion checking code. """ | |
682 tag = elem.tag | |
683 attributes = dict(elem.attrib) | |
684 converted_children = [] | |
685 for child_elem in elem: | |
686 converted_children.append(convert_elem(child_elem)) | |
687 return {"tag": tag, "attributes": attributes, "children": converted_children} | |
688 if assert_elem is not None: | |
689 assert_list = [] | |
690 for assert_child in list(assert_elem): | |
691 assert_list.append(convert_elem(assert_child)) | |
692 | |
693 return assert_list | |
694 | |
695 | |
696 def __parse_extra_files_elem(extra): | |
697 # File or directory, when directory, compare basename | |
698 # by basename | |
699 attrib = dict(extra.attrib) | |
700 extra_type = attrib.pop('type', 'file') | |
701 extra_name = attrib.pop('name', None) | |
702 assert extra_type == 'directory' or extra_name is not None, \ | |
703 'extra_files type (%s) requires a name attribute' % extra_type | |
704 extra_value, extra_attributes = __parse_test_attributes(extra, attrib) | |
705 return { | |
706 "value": extra_value, | |
707 "name": extra_name, | |
708 "type": extra_type, | |
709 "attributes": extra_attributes | |
710 } | |
711 | |
712 | |
713 def __expand_input_elems(root_elem, prefix=""): | |
714 __append_prefix_to_params(root_elem, prefix) | |
715 | |
716 repeat_elems = root_elem.findall('repeat') | |
717 indices = {} | |
718 for repeat_elem in repeat_elems: | |
719 name = repeat_elem.get("name") | |
720 if name not in indices: | |
721 indices[name] = 0 | |
722 index = 0 | |
723 else: | |
724 index = indices[name] + 1 | |
725 indices[name] = index | |
726 | |
727 new_prefix = __prefix_join(prefix, name, index=index) | |
728 __expand_input_elems(repeat_elem, new_prefix) | |
729 __pull_up_params(root_elem, repeat_elem) | |
730 | |
731 cond_elems = root_elem.findall('conditional') | |
732 for cond_elem in cond_elems: | |
733 new_prefix = __prefix_join(prefix, cond_elem.get("name")) | |
734 __expand_input_elems(cond_elem, new_prefix) | |
735 __pull_up_params(root_elem, cond_elem) | |
736 | |
737 section_elems = root_elem.findall('section') | |
738 for section_elem in section_elems: | |
739 new_prefix = __prefix_join(prefix, section_elem.get("name")) | |
740 __expand_input_elems(section_elem, new_prefix) | |
741 __pull_up_params(root_elem, section_elem) | |
742 | |
743 | |
744 def __append_prefix_to_params(elem, prefix): | |
745 for param_elem in elem.findall('param'): | |
746 param_elem.set("name", __prefix_join(prefix, param_elem.get("name"))) | |
747 | |
748 | |
749 def __pull_up_params(parent_elem, child_elem): | |
750 for param_elem in child_elem.findall('param'): | |
751 parent_elem.append(param_elem) | |
752 | |
753 | |
754 def __prefix_join(prefix, name, index=None): | |
755 name = name if index is None else "%s_%d" % (name, index) | |
756 return name if not prefix else "%s|%s" % (prefix, name) | |
757 | |
758 | |
759 def _copy_to_dict_if_present(elem, rval, attributes): | |
760 for attribute in attributes: | |
761 if attribute in elem.attrib: | |
762 rval[attribute] = elem.get(attribute) | |
763 return rval | |
764 | |
765 | |
766 def __parse_inputs_elems(test_elem, i): | |
767 raw_inputs = [] | |
768 for param_elem in test_elem.findall("param"): | |
769 raw_inputs.append(__parse_param_elem(param_elem, i)) | |
770 | |
771 return raw_inputs | |
772 | |
773 | |
774 def __parse_param_elem(param_elem, i=0): | |
775 attrib = dict(param_elem.attrib) | |
776 if 'values' in attrib: | |
777 value = attrib['values'].split(',') | |
778 elif 'value' in attrib: | |
779 value = attrib['value'] | |
780 else: | |
781 value = None | |
782 children_elem = param_elem | |
783 if children_elem is not None: | |
784 # At this time, we can assume having children only | |
785 # occurs on DataToolParameter test items but this could | |
786 # change and would cause the below parsing to change | |
787 # based upon differences in children items | |
788 attrib['metadata'] = {} | |
789 attrib['composite_data'] = [] | |
790 attrib['edit_attributes'] = [] | |
791 # Composite datasets need to be renamed uniquely | |
792 composite_data_name = None | |
793 for child in children_elem: | |
794 if child.tag == 'composite_data': | |
795 file_name = child.get("value") | |
796 attrib['composite_data'].append(file_name) | |
797 if composite_data_name is None: | |
798 # Generate a unique name; each test uses a | |
799 # fresh history. | |
800 composite_data_name = '_COMPOSITE_RENAMED_t%d_%s' \ | |
801 % (i, uuid.uuid1().hex) | |
802 elif child.tag == 'metadata': | |
803 attrib['metadata'][child.get("name")] = child.get("value") | |
804 elif child.tag == 'edit_attributes': | |
805 attrib['edit_attributes'].append(child) | |
806 elif child.tag == 'collection': | |
807 attrib['collection'] = TestCollectionDef.from_xml(child, __parse_param_elem) | |
808 if composite_data_name: | |
809 # Composite datasets need implicit renaming; | |
810 # inserted at front of list so explicit declarations | |
811 # take precedence | |
812 attrib['edit_attributes'].insert(0, {'type': 'name', 'value': composite_data_name}) | |
813 name = attrib.pop('name') | |
814 return { | |
815 "name": name, | |
816 "value": value, | |
817 "attributes": attrib | |
818 } | |
819 | |
820 | |
821 class StdioParser(object): | |
822 | |
823 def __init__(self, root): | |
824 try: | |
825 self.stdio_exit_codes = list() | |
826 self.stdio_regexes = list() | |
827 | |
828 # We should have a single <stdio> element, but handle the case for | |
829 # multiples. | |
830 # For every stdio element, add all of the exit_code and regex | |
831 # subelements that we find: | |
832 for stdio_elem in (root.findall('stdio')): | |
833 self.parse_stdio_exit_codes(stdio_elem) | |
834 self.parse_stdio_regexes(stdio_elem) | |
835 except Exception: | |
836 log.exception("Exception in parse_stdio!") | |
837 | |
838 def parse_stdio_exit_codes(self, stdio_elem): | |
839 """ | |
840 Parse the tool's <stdio> element's <exit_code> subelements. | |
841 This will add all of those elements, if any, to self.stdio_exit_codes. | |
842 """ | |
843 try: | |
844 # Look for all <exit_code> elements. Each exit_code element must | |
845 # have a range/value. | |
846 # Exit-code ranges have precedence over a single exit code. | |
847 # So if there are value and range attributes, we use the range | |
848 # attribute. If there is neither a range nor a value, then print | |
849 # a warning and skip to the next. | |
850 for exit_code_elem in (stdio_elem.findall("exit_code")): | |
851 exit_code = ToolStdioExitCode() | |
852 # Each exit code has an optional description that can be | |
853 # part of the "desc" or "description" attributes: | |
854 exit_code.desc = exit_code_elem.get("desc") | |
855 if exit_code.desc is None: | |
856 exit_code.desc = exit_code_elem.get("description") | |
857 # Parse the error level: | |
858 exit_code.error_level = ( | |
859 self.parse_error_level(exit_code_elem.get("level"))) | |
860 code_range = exit_code_elem.get("range", "") | |
861 if code_range is None: | |
862 code_range = exit_code_elem.get("value", "") | |
863 if code_range is None: | |
864 log.warning("Tool stdio exit codes must have a range or value") | |
865 continue | |
866 # Parse the range. We look for: | |
867 # :Y | |
868 # X: | |
869 # X:Y - Split on the colon. We do not allow a colon | |
870 # without a beginning or end, though we could. | |
871 # Also note that whitespace is eliminated. | |
872 # TODO: Turn this into a single match - it should be | |
873 # more efficient. | |
874 code_range = re.sub(r"\s", "", code_range) | |
875 code_ranges = re.split(r":", code_range) | |
876 if (len(code_ranges) == 2): | |
877 if (code_ranges[0] is None or '' == code_ranges[0]): | |
878 exit_code.range_start = float("-inf") | |
879 else: | |
880 exit_code.range_start = int(code_ranges[0]) | |
881 if (code_ranges[1] is None or '' == code_ranges[1]): | |
882 exit_code.range_end = float("inf") | |
883 else: | |
884 exit_code.range_end = int(code_ranges[1]) | |
885 # If we got more than one colon, then ignore the exit code. | |
886 elif (len(code_ranges) > 2): | |
887 log.warning("Invalid tool exit_code range %s - ignored" | |
888 % code_range) | |
889 continue | |
890 # Else we have a singular value. If it's not an integer, then | |
891 # we'll just write a log message and skip this exit_code. | |
892 else: | |
893 try: | |
894 exit_code.range_start = int(code_range) | |
895 except Exception: | |
896 log.error(code_range) | |
897 log.warning("Invalid range start for tool's exit_code %s: exit_code ignored" % code_range) | |
898 continue | |
899 exit_code.range_end = exit_code.range_start | |
900 # TODO: Check if we got ">", ">=", "<", or "<=": | |
901 # Check that the range, regardless of how we got it, | |
902 # isn't bogus. If we have two infinite values, then | |
903 # the start must be -inf and the end must be +inf. | |
904 # So at least warn about this situation: | |
905 if isinf(exit_code.range_start) and isinf(exit_code.range_end): | |
906 log.warning("Tool exit_code range %s will match on all exit codes" % code_range) | |
907 self.stdio_exit_codes.append(exit_code) | |
908 except Exception: | |
909 log.exception("Exception in parse_stdio_exit_codes!") | |
910 | |
911 def parse_stdio_regexes(self, stdio_elem): | |
912 """ | |
913 Look in the tool's <stdio> elem for all <regex> subelements | |
914 that define how to look for warnings and fatal errors in | |
915 stdout and stderr. This will add all such regex elements | |
916 to the Tols's stdio_regexes list. | |
917 """ | |
918 try: | |
919 # Look for every <regex> subelement. The regular expression | |
920 # will have "match" and "source" (or "src") attributes. | |
921 for regex_elem in (stdio_elem.findall("regex")): | |
922 # TODO: Fill in ToolStdioRegex | |
923 regex = ToolStdioRegex() | |
924 # Each regex has an optional description that can be | |
925 # part of the "desc" or "description" attributes: | |
926 regex.desc = regex_elem.get("desc") | |
927 if regex.desc is None: | |
928 regex.desc = regex_elem.get("description") | |
929 # Parse the error level | |
930 regex.error_level = ( | |
931 self.parse_error_level(regex_elem.get("level"))) | |
932 regex.match = regex_elem.get("match", "") | |
933 if regex.match is None: | |
934 # TODO: Convert the offending XML element to a string | |
935 log.warning("Ignoring tool's stdio regex element %s - " | |
936 "the 'match' attribute must exist") | |
937 continue | |
938 # Parse the output sources. We look for the "src", "source", | |
939 # and "sources" attributes, in that order. If there is no | |
940 # such source, then the source defaults to stderr & stdout. | |
941 # Look for a comma and then look for "err", "error", "out", | |
942 # and "output": | |
943 output_srcs = regex_elem.get("src") | |
944 if output_srcs is None: | |
945 output_srcs = regex_elem.get("source") | |
946 if output_srcs is None: | |
947 output_srcs = regex_elem.get("sources") | |
948 if output_srcs is None: | |
949 output_srcs = "output,error" | |
950 output_srcs = re.sub(r"\s", "", output_srcs) | |
951 src_list = re.split(r",", output_srcs) | |
952 # Just put together anything to do with "out", including | |
953 # "stdout", "output", etc. Repeat for "stderr", "error", | |
954 # and anything to do with "err". If neither stdout nor | |
955 # stderr were specified, then raise a warning and scan both. | |
956 for src in src_list: | |
957 if re.search("both", src, re.IGNORECASE): | |
958 regex.stdout_match = True | |
959 regex.stderr_match = True | |
960 if re.search("out", src, re.IGNORECASE): | |
961 regex.stdout_match = True | |
962 if re.search("err", src, re.IGNORECASE): | |
963 regex.stderr_match = True | |
964 if (not regex.stdout_match and not regex.stderr_match): | |
965 log.warning("Tool id %s: unable to determine if tool " | |
966 "stream source scanning is output, error, " | |
967 "or both. Defaulting to use both." % self.id) | |
968 regex.stdout_match = True | |
969 regex.stderr_match = True | |
970 self.stdio_regexes.append(regex) | |
971 except Exception: | |
972 log.exception("Exception in parse_stdio_exit_codes!") | |
973 | |
974 # TODO: This method doesn't have to be part of the Tool class. | |
975 def parse_error_level(self, err_level): | |
976 """ | |
977 Parses error level and returns error level enumeration. If | |
978 unparsable, returns 'fatal' | |
979 """ | |
980 return_level = StdioErrorLevel.FATAL | |
981 try: | |
982 if err_level: | |
983 if (re.search("log", err_level, re.IGNORECASE)): | |
984 return_level = StdioErrorLevel.LOG | |
985 elif (re.search("qc", err_level, re.IGNORECASE)): | |
986 return_level = StdioErrorLevel.QC | |
987 elif (re.search("warning", err_level, re.IGNORECASE)): | |
988 return_level = StdioErrorLevel.WARNING | |
989 elif (re.search("fatal_oom", err_level, re.IGNORECASE)): | |
990 return_level = StdioErrorLevel.FATAL_OOM | |
991 elif (re.search("fatal", err_level, re.IGNORECASE)): | |
992 return_level = StdioErrorLevel.FATAL | |
993 else: | |
994 log.debug("Tool %s: error level %s did not match log/warning/fatal" % | |
995 (self.id, err_level)) | |
996 except Exception: | |
997 log.exception("Exception in parse_error_level") | |
998 return return_level | |
999 | |
1000 | |
1001 class XmlPagesSource(PagesSource): | |
1002 | |
1003 def __init__(self, root): | |
1004 self.input_elem = root.find("inputs") | |
1005 page_sources = [] | |
1006 if self.input_elem is not None: | |
1007 pages_elem = self.input_elem.findall("page") | |
1008 for page in (pages_elem or [self.input_elem]): | |
1009 page_sources.append(XmlPageSource(page)) | |
1010 super(XmlPagesSource, self).__init__(page_sources) | |
1011 | |
1012 @property | |
1013 def inputs_defined(self): | |
1014 return self.input_elem is not None | |
1015 | |
1016 | |
1017 class XmlPageSource(PageSource): | |
1018 | |
1019 def __init__(self, parent_elem): | |
1020 self.parent_elem = parent_elem | |
1021 | |
1022 def parse_display(self): | |
1023 display_elem = self.parent_elem.find("display") | |
1024 if display_elem is not None: | |
1025 display = xml_to_string(display_elem) | |
1026 else: | |
1027 display = None | |
1028 return display | |
1029 | |
1030 def parse_input_sources(self): | |
1031 return map(XmlInputSource, self.parent_elem) | |
1032 | |
1033 | |
1034 class XmlInputSource(InputSource): | |
1035 | |
1036 def __init__(self, input_elem): | |
1037 self.input_elem = input_elem | |
1038 self.input_type = self.input_elem.tag | |
1039 | |
1040 def parse_input_type(self): | |
1041 return self.input_type | |
1042 | |
1043 def elem(self): | |
1044 return self.input_elem | |
1045 | |
1046 def get(self, key, value=None): | |
1047 return self.input_elem.get(key, value) | |
1048 | |
1049 def get_bool(self, key, default): | |
1050 return string_as_bool(self.get(key, default)) | |
1051 | |
1052 def parse_label(self): | |
1053 return xml_text(self.input_elem, "label") | |
1054 | |
1055 def parse_help(self): | |
1056 return xml_text(self.input_elem, "help") | |
1057 | |
1058 def parse_sanitizer_elem(self): | |
1059 return self.input_elem.find("sanitizer") | |
1060 | |
1061 def parse_validator_elems(self): | |
1062 return self.input_elem.findall("validator") | |
1063 | |
1064 def parse_dynamic_options_elem(self): | |
1065 """ Return a galaxy.tools.parameters.dynamic_options.DynamicOptions | |
1066 if appropriate. | |
1067 """ | |
1068 options_elem = self.input_elem.find('options') | |
1069 return options_elem | |
1070 | |
1071 def parse_static_options(self): | |
1072 static_options = list() | |
1073 elem = self.input_elem | |
1074 for option in elem.findall("option"): | |
1075 value = option.get("value") | |
1076 selected = string_as_bool(option.get("selected", False)) | |
1077 static_options.append((option.text or value, value, selected)) | |
1078 return static_options | |
1079 | |
1080 def parse_optional(self, default=None): | |
1081 """ Return boolean indicating whether parameter is optional. """ | |
1082 elem = self.input_elem | |
1083 if self.get('type') == "data_column": | |
1084 # Allow specifing force_select for backward compat., but probably | |
1085 # should use optional going forward for consistency with other | |
1086 # parameters. | |
1087 if "force_select" in elem.attrib: | |
1088 force_select = string_as_bool(elem.get("force_select")) | |
1089 else: | |
1090 force_select = not string_as_bool(elem.get("optional", False)) | |
1091 return not force_select | |
1092 | |
1093 if default is None: | |
1094 default = self.default_optional | |
1095 return self.get_bool("optional", default) | |
1096 | |
1097 def parse_conversion_tuples(self): | |
1098 elem = self.input_elem | |
1099 conversions = [] | |
1100 for conv_elem in elem.findall("conversion"): | |
1101 name = conv_elem.get("name") # name for commandline substitution | |
1102 conv_extensions = conv_elem.get("type") # target datatype extension | |
1103 conversions.append((name, conv_extensions)) | |
1104 return conversions | |
1105 | |
1106 def parse_nested_inputs_source(self): | |
1107 elem = self.input_elem | |
1108 return XmlPageSource(elem) | |
1109 | |
1110 def parse_test_input_source(self): | |
1111 elem = self.input_elem | |
1112 input_elem = elem.find("param") | |
1113 assert input_elem is not None, "<conditional> must have a child <param>" | |
1114 return XmlInputSource(input_elem) | |
1115 | |
1116 def parse_when_input_sources(self): | |
1117 elem = self.input_elem | |
1118 | |
1119 sources = [] | |
1120 for case_elem in elem.findall("when"): | |
1121 value = case_elem.get("value") | |
1122 case_page_source = XmlPageSource(case_elem) | |
1123 sources.append((value, case_page_source)) | |
1124 return sources | |
1125 | |
1126 | |
1127 class ParallelismInfo(object): | |
1128 """ | |
1129 Stores the information (if any) for running multiple instances of the tool in parallel | |
1130 on the same set of inputs. | |
1131 """ | |
1132 | |
1133 def __init__(self, tag): | |
1134 self.method = tag.get('method') | |
1135 if isinstance(tag, dict): | |
1136 items = tag.items() | |
1137 else: | |
1138 items = tag.attrib.items() | |
1139 self.attributes = dict([item for item in items if item[0] != 'method']) | |
1140 if len(self.attributes) == 0: | |
1141 # legacy basic mode - provide compatible defaults | |
1142 self.attributes['split_size'] = 20 | |
1143 self.attributes['split_mode'] = 'number_of_parts' |