comparison planemo/lib/python3.7/site-packages/galaxy/tool_util/parser/xml.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 import logging
2 import re
3 import uuid
4 from collections import OrderedDict
5 from math import isinf
6
7 import packaging.version
8
9 from galaxy.tool_util.deps import requirements
10 from galaxy.tool_util.parser.util import (
11 DEFAULT_DELTA,
12 DEFAULT_DELTA_FRAC
13 )
14 from galaxy.util import (
15 string_as_bool,
16 xml_text,
17 xml_to_string
18 )
19 from .interface import (
20 InputSource,
21 PageSource,
22 PagesSource,
23 TestCollectionDef,
24 TestCollectionOutputDef,
25 ToolSource,
26 )
27 from .output_actions import ToolOutputActionGroup
28 from .output_collection_def import dataset_collector_descriptions_from_elem
29 from .output_objects import (
30 ToolExpressionOutput,
31 ToolOutput,
32 ToolOutputCollection,
33 ToolOutputCollectionStructure
34 )
35 from .stdio import (
36 aggressive_error_checks,
37 error_on_exit_code,
38 StdioErrorLevel,
39 ToolStdioExitCode,
40 ToolStdioRegex,
41 )
42
43
44 log = logging.getLogger(__name__)
45
46
47 class XmlToolSource(ToolSource):
48 """ Responsible for parsing a tool from classic Galaxy representation.
49 """
50
51 def __init__(self, xml_tree, source_path=None, macro_paths=None):
52 self.xml_tree = xml_tree
53 self.root = xml_tree.getroot()
54 self._source_path = source_path
55 self._macro_paths = macro_paths or []
56 self.legacy_defaults = self.parse_profile() == "16.01"
57
58 def to_string(self):
59 return xml_to_string(self.root)
60
61 def parse_version(self):
62 return self.root.get("version", None)
63
64 def parse_id(self):
65 return self.root.get("id")
66
67 def parse_tool_module(self):
68 root = self.root
69 if root.find("type") is not None:
70 type_elem = root.find("type")
71 module = type_elem.get('module', 'galaxy.tools')
72 cls = type_elem.get('class')
73 return module, cls
74
75 return None
76
77 def parse_action_module(self):
78 root = self.root
79 action_elem = root.find("action")
80 if action_elem is not None:
81 module = action_elem.get('module')
82 cls = action_elem.get('class')
83 return module, cls
84 else:
85 return None
86
87 def parse_tool_type(self):
88 root = self.root
89 if root.get('tool_type', None) is not None:
90 return root.get('tool_type')
91
92 def parse_name(self):
93 return self.root.get("name")
94
95 def parse_edam_operations(self):
96 edam_ops = self.root.find("edam_operations")
97 if edam_ops is None:
98 return []
99 return [edam_op.text for edam_op in edam_ops.findall("edam_operation")]
100
101 def parse_edam_topics(self):
102 edam_topics = self.root.find("edam_topics")
103 if edam_topics is None:
104 return []
105 return [edam_topic.text for edam_topic in edam_topics.findall("edam_topic")]
106
107 def parse_xrefs(self):
108 xrefs = self.root.find("xrefs")
109 if xrefs is None:
110 return []
111 return [dict(value=xref.text.strip(), reftype=xref.attrib['type']) for xref in xrefs.findall("xref") if xref.get("type")]
112
113 def parse_description(self):
114 return xml_text(self.root, "description")
115
116 def parse_is_multi_byte(self):
117 return self._get_attribute_as_bool("is_multi_byte", self.default_is_multi_byte)
118
119 def parse_display_interface(self, default):
120 return self._get_attribute_as_bool("display_interface", default)
121
122 def parse_require_login(self, default):
123 return self._get_attribute_as_bool("require_login", default)
124
125 def parse_request_param_translation_elem(self):
126 return self.root.find("request_param_translation")
127
128 def parse_command(self):
129 command_el = self._command_el
130 return ((command_el is not None) and command_el.text) or None
131
132 def parse_expression(self):
133 """ Return string containing command to run.
134 """
135 expression_el = self.root.find("expression")
136 if expression_el is not None:
137 expression_type = expression_el.get("type")
138 if expression_type != "ecma5.1":
139 raise Exception("Unknown expression type [%s] encountered" % expression_type)
140 return expression_el.text
141 return None
142
143 def parse_environment_variables(self):
144 environment_variables_el = self.root.find("environment_variables")
145 if environment_variables_el is None:
146 return []
147
148 environment_variables = []
149 for environment_variable_el in environment_variables_el.findall("environment_variable"):
150 template = environment_variable_el.text
151 inject = environment_variable_el.get("inject")
152 if inject:
153 assert not template, "Cannot specify inject and environment variable template."
154 assert inject in ["api_key"]
155 if template:
156 assert not inject, "Cannot specify inject and environment variable template."
157 definition = {
158 "name": environment_variable_el.get("name"),
159 "template": template,
160 "inject": inject,
161 "strip": string_as_bool(environment_variable_el.get("strip", False)),
162 }
163 environment_variables.append(
164 definition
165 )
166 return environment_variables
167
168 def parse_home_target(self):
169 target = "job_home" if self.parse_profile() >= "18.01" else "shared_home"
170 command_el = self._command_el
171 command_legacy = (command_el is not None) and command_el.get("use_shared_home", None)
172 if command_legacy is not None:
173 target = "shared_home" if string_as_bool(command_legacy) else "job_home"
174 return target
175
176 def parse_tmp_target(self):
177 # Default to not touching TMPDIR et. al. but if job_tmp is set
178 # in job_conf then do. This is a very conservative approach that shouldn't
179 # break or modify any configurations by default.
180 return "job_tmp_if_explicit"
181
182 def parse_docker_env_pass_through(self):
183 if self.parse_profile() < "18.01":
184 return ["GALAXY_SLOTS"]
185 else:
186 # Pass home, etc...
187 return super(XmlToolSource, self).parse_docker_env_pass_through()
188
189 def parse_interpreter(self):
190 interpreter = None
191 command_el = self._command_el
192 if command_el is not None:
193 interpreter = command_el.get("interpreter", None)
194 if interpreter and not self.legacy_defaults:
195 log.warning("Deprecated interpreter attribute on command element is now ignored.")
196 interpreter = None
197 return interpreter
198
199 def parse_version_command(self):
200 version_cmd = self.root.find("version_command")
201 if version_cmd is not None:
202 return version_cmd.text
203 else:
204 return None
205
206 def parse_version_command_interpreter(self):
207 if self.parse_version_command() is not None:
208 version_cmd = self.root.find("version_command")
209 version_cmd_interpreter = version_cmd.get("interpreter", None)
210 if version_cmd_interpreter:
211 return version_cmd_interpreter
212 return None
213
214 def parse_parallelism(self):
215 parallelism = self.root.find("parallelism")
216 parallelism_info = None
217 if parallelism is not None and parallelism.get("method"):
218 return ParallelismInfo(parallelism)
219 return parallelism_info
220
221 def parse_interactivetool(self):
222 interactivetool_el = self.root.find("entry_points")
223 rtt = []
224 if interactivetool_el is None:
225 return rtt
226 for ep_el in interactivetool_el.findall("entry_point"):
227 port = ep_el.find("port")
228 assert port is not None, ValueError('A port is required for InteractiveTools')
229 port = port.text.strip()
230 url = ep_el.find("url")
231 if url is not None:
232 url = url.text.strip()
233 name = ep_el.get('name', None)
234 if name:
235 name = name.strip()
236 requires_domain = string_as_bool(ep_el.attrib.get("requires_domain", False))
237 rtt.append(dict(port=port, url=url, name=name, requires_domain=requires_domain))
238 return rtt
239
240 def parse_hidden(self):
241 hidden = xml_text(self.root, "hidden")
242 if hidden:
243 hidden = string_as_bool(hidden)
244 return hidden
245
246 def parse_redirect_url_params_elem(self):
247 return self.root.find("redirect_url_params")
248
249 def parse_sanitize(self):
250 return self._get_option_value("sanitize", True)
251
252 def parse_refresh(self):
253 return self._get_option_value("refresh", False)
254
255 def _get_option_value(self, key, default):
256 root = self.root
257 for option_elem in root.findall("options"):
258 if key in option_elem.attrib:
259 return string_as_bool(option_elem.get(key))
260 return default
261
262 @property
263 def _command_el(self):
264 return self.root.find("command")
265
266 def _get_attribute_as_bool(self, attribute, default, elem=None):
267 if elem is None:
268 elem = self.root
269 return string_as_bool(elem.get(attribute, default))
270
271 def parse_requirements_and_containers(self):
272 return requirements.parse_requirements_from_xml(self.root)
273
274 def parse_input_pages(self):
275 return XmlPagesSource(self.root)
276
277 def parse_provided_metadata_style(self):
278 style = None
279 out_elem = self.root.find("outputs")
280 if out_elem is not None and "provided_metadata_style" in out_elem.attrib:
281 style = out_elem.attrib["provided_metadata_style"]
282
283 if style is None:
284 style = "legacy" if self.parse_profile() < "17.09" else "default"
285
286 assert style in ["legacy", "default"]
287 return style
288
289 def parse_provided_metadata_file(self):
290 provided_metadata_file = "galaxy.json"
291 out_elem = self.root.find("outputs")
292 if out_elem is not None and "provided_metadata_file" in out_elem.attrib:
293 provided_metadata_file = out_elem.attrib["provided_metadata_file"]
294
295 return provided_metadata_file
296
297 def parse_outputs(self, tool):
298 out_elem = self.root.find("outputs")
299 outputs = OrderedDict()
300 output_collections = OrderedDict()
301 if out_elem is None:
302 return outputs, output_collections
303
304 data_dict = OrderedDict()
305
306 def _parse(data_elem, **kwds):
307 output_def = self._parse_output(data_elem, tool, **kwds)
308 data_dict[output_def.name] = output_def
309 return output_def
310
311 for _ in out_elem.findall("data"):
312 _parse(_)
313
314 def _parse_expression(output_elem, **kwds):
315 output_def = self._parse_expression_output(output_elem, tool, **kwds)
316 output_def.filters = output_elem.findall('filter')
317 data_dict[output_def.name] = output_def
318 return output_def
319
320 def _parse_collection(collection_elem):
321 name = collection_elem.get("name")
322 label = xml_text(collection_elem, "label")
323 default_format = collection_elem.get("format", "data")
324 collection_type = collection_elem.get("type", None)
325 collection_type_source = collection_elem.get("type_source", None)
326 collection_type_from_rules = collection_elem.get("type_from_rules", None)
327 structured_like = collection_elem.get("structured_like", None)
328 inherit_format = False
329 inherit_metadata = False
330 if structured_like:
331 inherit_format = string_as_bool(collection_elem.get("inherit_format", None))
332 inherit_metadata = string_as_bool(collection_elem.get("inherit_metadata", None))
333 default_format_source = collection_elem.get("format_source", None)
334 default_metadata_source = collection_elem.get("metadata_source", "")
335 filters = collection_elem.findall('filter')
336
337 dataset_collector_descriptions = None
338 if collection_elem.find("discover_datasets") is not None:
339 dataset_collector_descriptions = dataset_collector_descriptions_from_elem(collection_elem, legacy=False)
340 structure = ToolOutputCollectionStructure(
341 collection_type=collection_type,
342 collection_type_source=collection_type_source,
343 collection_type_from_rules=collection_type_from_rules,
344 structured_like=structured_like,
345 dataset_collector_descriptions=dataset_collector_descriptions,
346 )
347 output_collection = ToolOutputCollection(
348 name,
349 structure,
350 label=label,
351 filters=filters,
352 default_format=default_format,
353 inherit_format=inherit_format,
354 inherit_metadata=inherit_metadata,
355 default_format_source=default_format_source,
356 default_metadata_source=default_metadata_source,
357 )
358 outputs[output_collection.name] = output_collection
359
360 for data_elem in collection_elem.findall("data"):
361 _parse(
362 data_elem,
363 default_format=default_format,
364 default_format_source=default_format_source,
365 default_metadata_source=default_metadata_source,
366 )
367
368 for data_elem in collection_elem.findall("data"):
369 output_name = data_elem.get("name")
370 data = data_dict[output_name]
371 assert data
372 del data_dict[output_name]
373 output_collection.outputs[output_name] = data
374 output_collections[name] = output_collection
375
376 for out_child in out_elem:
377 if out_child.tag == "data":
378 _parse(out_child)
379 elif out_child.tag == "collection":
380 _parse_collection(out_child)
381 elif out_child.tag == "output":
382 output_type = out_child.get("type")
383 if output_type == "data":
384 _parse(out_child)
385 elif output_type == "collection":
386 out_child.attrib["type"] = out_child.get("collection_type")
387 out_child.attrib["type_source"] = out_child.get("collection_type_source")
388 _parse_collection(out_child)
389 else:
390 _parse_expression(out_child)
391 else:
392 log.warning("Unknown output tag encountered [%s]" % out_child.tag)
393
394 for output_def in data_dict.values():
395 outputs[output_def.name] = output_def
396 return outputs, output_collections
397
398 def _parse_output(
399 self,
400 data_elem,
401 tool,
402 default_format="data",
403 default_format_source=None,
404 default_metadata_source="",
405 expression_type=None,
406 ):
407 output = ToolOutput(data_elem.get("name"))
408 output_format = data_elem.get("format", default_format)
409 auto_format = string_as_bool(data_elem.get("auto_format", "false"))
410 if auto_format and output_format != "data":
411 raise ValueError("Setting format and auto_format is not supported at this time.")
412 elif auto_format:
413 output_format = "_sniff_"
414 output.format = output_format
415 output.change_format = data_elem.findall("change_format")
416 output.format_source = data_elem.get("format_source", default_format_source)
417 output.default_identifier_source = data_elem.get("default_identifier_source", 'None')
418 output.metadata_source = data_elem.get("metadata_source", default_metadata_source)
419 output.parent = data_elem.get("parent", None)
420 output.label = xml_text(data_elem, "label")
421 output.count = int(data_elem.get("count", 1))
422 output.filters = data_elem.findall('filter')
423 output.tool = tool
424 output.from_work_dir = data_elem.get("from_work_dir", None)
425 output.hidden = string_as_bool(data_elem.get("hidden", ""))
426 output.actions = ToolOutputActionGroup(output, data_elem.find('actions'))
427 output.dataset_collector_descriptions = dataset_collector_descriptions_from_elem(data_elem, legacy=self.legacy_defaults)
428 return output
429
430 def _parse_expression_output(self, output_elem, tool, **kwds):
431 output_type = output_elem.get("type")
432 from_expression = output_elem.get("from")
433 output = ToolExpressionOutput(
434 output_elem.get("name"),
435 output_type,
436 from_expression,
437 )
438 output.path = output_elem.get("value")
439 output.label = xml_text(output_elem, "label")
440
441 output.hidden = string_as_bool(output_elem.get("hidden", ""))
442 output.actions = ToolOutputActionGroup(output, output_elem.find('actions'))
443 output.dataset_collector_descriptions = []
444 return output
445
446 def parse_stdio(self):
447 """
448 parse error handling from command and stdio tag
449
450 returns list of exit codes, list of regexes
451 - exit_codes contain all non-zero exit codes (:-1 and 1:) if
452 detect_errors is default (if not legacy), exit_code, or aggressive
453 - the oom_exit_code if given and detect_errors is exit_code
454 - exit codes and regexes from the stdio tag
455 these are prepended to the list, i.e. are evaluated prior to regexes
456 and exit codes derived from the properties of the command tag.
457 thus more specific regexes of the same or more severe error level
458 are triggered first.
459 """
460
461 command_el = self._command_el
462 detect_errors = None
463 if command_el is not None:
464 detect_errors = command_el.get("detect_errors")
465
466 if detect_errors and detect_errors != "default":
467 if detect_errors == "exit_code":
468 oom_exit_code = None
469 if command_el is not None:
470 oom_exit_code = command_el.get("oom_exit_code", None)
471 if oom_exit_code is not None:
472 int(oom_exit_code)
473 exit_codes, regexes = error_on_exit_code(out_of_memory_exit_code=oom_exit_code)
474 elif detect_errors == "aggressive":
475 exit_codes, regexes = aggressive_error_checks()
476 else:
477 raise ValueError("Unknown detect_errors value encountered [%s]" % detect_errors)
478 elif len(self.root.findall('stdio')) == 0 and not self.legacy_defaults:
479 exit_codes, regexes = error_on_exit_code()
480 else:
481 exit_codes = []
482 regexes = []
483
484 if len(self.root.findall('stdio')) > 0:
485 parser = StdioParser(self.root)
486 exit_codes = parser.stdio_exit_codes + exit_codes
487 regexes = parser.stdio_regexes + regexes
488
489 return exit_codes, regexes
490
491 def parse_strict_shell(self):
492 command_el = self._command_el
493 if packaging.version.parse(self.parse_profile()) < packaging.version.parse('20.09'):
494 default = "False"
495 else:
496 default = "True"
497 if command_el is not None:
498 return string_as_bool(command_el.get("strict", default))
499 else:
500 return string_as_bool(default)
501
502 def parse_help(self):
503 help_elem = self.root.find('help')
504 return help_elem.text if help_elem is not None else None
505
506 @property
507 def macro_paths(self):
508 return self._macro_paths
509
510 @property
511 def source_path(self):
512 return self._source_path
513
514 def parse_tests_to_dict(self):
515 tests_elem = self.root.find("tests")
516 tests = []
517 rval = dict(
518 tests=tests
519 )
520
521 if tests_elem is not None:
522 for i, test_elem in enumerate(tests_elem.findall("test")):
523 tests.append(_test_elem_to_dict(test_elem, i))
524
525 return rval
526
527 def parse_profile(self):
528 # Pre-16.04 or default XML defaults
529 # - Use standard error for error detection.
530 # - Don't run shells with -e
531 # - Auto-check for implicit multiple outputs.
532 # - Auto-check for $param_file.
533 # - Enable buggy interpreter attribute.
534 return self.root.get("profile", "16.01")
535
536 def parse_python_template_version(self):
537 python_template_version = self.root.get("python_template_version", None)
538 if python_template_version is not None:
539 python_template_version = packaging.version.parse(python_template_version)
540 return python_template_version
541
542
543 def _test_elem_to_dict(test_elem, i):
544 rval = dict(
545 outputs=__parse_output_elems(test_elem),
546 output_collections=__parse_output_collection_elems(test_elem),
547 inputs=__parse_input_elems(test_elem, i),
548 expect_num_outputs=test_elem.get("expect_num_outputs"),
549 command=__parse_assert_list_from_elem(test_elem.find("assert_command")),
550 command_version=__parse_assert_list_from_elem(test_elem.find("assert_command_version")),
551 stdout=__parse_assert_list_from_elem(test_elem.find("assert_stdout")),
552 stderr=__parse_assert_list_from_elem(test_elem.find("assert_stderr")),
553 expect_exit_code=test_elem.get("expect_exit_code"),
554 expect_failure=string_as_bool(test_elem.get("expect_failure", False)),
555 maxseconds=test_elem.get("maxseconds", None),
556 )
557 _copy_to_dict_if_present(test_elem, rval, ["num_outputs"])
558 return rval
559
560
561 def __parse_input_elems(test_elem, i):
562 __expand_input_elems(test_elem)
563 return __parse_inputs_elems(test_elem, i)
564
565
566 def __parse_output_elems(test_elem):
567 outputs = []
568 for output_elem in test_elem.findall("output"):
569 name, file, attributes = __parse_output_elem(output_elem)
570 outputs.append({"name": name, "value": file, "attributes": attributes})
571 return outputs
572
573
574 def __parse_output_elem(output_elem):
575 attrib = dict(output_elem.attrib)
576 name = attrib.pop('name', None)
577 if name is None:
578 raise Exception("Test output does not have a 'name'")
579
580 file, attributes = __parse_test_attributes(output_elem, attrib, parse_discovered_datasets=True)
581 return name, file, attributes
582
583
584 def __parse_command_elem(test_elem):
585 assert_elem = test_elem.find("command")
586 return __parse_assert_list_from_elem(assert_elem)
587
588
589 def __parse_output_collection_elems(test_elem):
590 output_collections = []
591 for output_collection_elem in test_elem.findall("output_collection"):
592 output_collection_def = __parse_output_collection_elem(output_collection_elem)
593 output_collections.append(output_collection_def)
594 return output_collections
595
596
597 def __parse_output_collection_elem(output_collection_elem):
598 attrib = dict(output_collection_elem.attrib)
599 name = attrib.pop('name', None)
600 if name is None:
601 raise Exception("Test output collection does not have a 'name'")
602 element_tests = __parse_element_tests(output_collection_elem)
603 return TestCollectionOutputDef(name, attrib, element_tests).to_dict()
604
605
606 def __parse_element_tests(parent_element):
607 element_tests = {}
608 for element in parent_element.findall("element"):
609 element_attrib = dict(element.attrib)
610 identifier = element_attrib.pop('name', None)
611 if identifier is None:
612 raise Exception("Test primary dataset does not have a 'identifier'")
613 element_tests[identifier] = __parse_test_attributes(element, element_attrib, parse_elements=True)
614 return element_tests
615
616
617 def __parse_test_attributes(output_elem, attrib, parse_elements=False, parse_discovered_datasets=False):
618 assert_list = __parse_assert_list(output_elem)
619
620 # Allow either file or value to specify a target file to compare result with
621 # file was traditionally used by outputs and value by extra files.
622 file = attrib.pop('file', attrib.pop('value', None))
623
624 # File no longer required if an list of assertions was present.
625 attributes = {}
626 # Method of comparison
627 attributes['compare'] = attrib.pop('compare', 'diff').lower()
628 # Number of lines to allow to vary in logs (for dates, etc)
629 attributes['lines_diff'] = int(attrib.pop('lines_diff', '0'))
630 # Allow a file size to vary if sim_size compare
631 attributes['delta'] = int(attrib.pop('delta', DEFAULT_DELTA))
632 attributes['delta_frac'] = float(attrib['delta_frac']) if 'delta_frac' in attrib else DEFAULT_DELTA_FRAC
633 attributes['sort'] = string_as_bool(attrib.pop('sort', False))
634 attributes['decompress'] = string_as_bool(attrib.pop('decompress', False))
635 extra_files = []
636 if 'ftype' in attrib:
637 attributes['ftype'] = attrib['ftype']
638 for extra in output_elem.findall('extra_files'):
639 extra_files.append(__parse_extra_files_elem(extra))
640 metadata = {}
641 for metadata_elem in output_elem.findall('metadata'):
642 metadata[metadata_elem.get('name')] = metadata_elem.get('value')
643 md5sum = attrib.get("md5", None)
644 checksum = attrib.get("checksum", None)
645 element_tests = {}
646 if parse_elements:
647 element_tests = __parse_element_tests(output_elem)
648
649 primary_datasets = {}
650 if parse_discovered_datasets:
651 for primary_elem in (output_elem.findall("discovered_dataset") or []):
652 primary_attrib = dict(primary_elem.attrib)
653 designation = primary_attrib.pop('designation', None)
654 if designation is None:
655 raise Exception("Test primary dataset does not have a 'designation'")
656 primary_datasets[designation] = __parse_test_attributes(primary_elem, primary_attrib)
657
658 has_checksum = md5sum or checksum
659 has_nested_tests = extra_files or element_tests or primary_datasets
660 if not (assert_list or file or metadata or has_checksum or has_nested_tests):
661 raise Exception("Test output defines nothing to check (e.g. must have a 'file' check against, assertions to check, metadata or checksum tests, etc...)")
662 attributes['assert_list'] = assert_list
663 attributes['extra_files'] = extra_files
664 attributes['metadata'] = metadata
665 attributes['md5'] = md5sum
666 attributes['checksum'] = checksum
667 attributes['elements'] = element_tests
668 attributes['primary_datasets'] = primary_datasets
669 return file, attributes
670
671
672 def __parse_assert_list(output_elem):
673 assert_elem = output_elem.find("assert_contents")
674 return __parse_assert_list_from_elem(assert_elem)
675
676
677 def __parse_assert_list_from_elem(assert_elem):
678 assert_list = None
679
680 def convert_elem(elem):
681 """ Converts and XML element to a dictionary format, used by assertion checking code. """
682 tag = elem.tag
683 attributes = dict(elem.attrib)
684 converted_children = []
685 for child_elem in elem:
686 converted_children.append(convert_elem(child_elem))
687 return {"tag": tag, "attributes": attributes, "children": converted_children}
688 if assert_elem is not None:
689 assert_list = []
690 for assert_child in list(assert_elem):
691 assert_list.append(convert_elem(assert_child))
692
693 return assert_list
694
695
696 def __parse_extra_files_elem(extra):
697 # File or directory, when directory, compare basename
698 # by basename
699 attrib = dict(extra.attrib)
700 extra_type = attrib.pop('type', 'file')
701 extra_name = attrib.pop('name', None)
702 assert extra_type == 'directory' or extra_name is not None, \
703 'extra_files type (%s) requires a name attribute' % extra_type
704 extra_value, extra_attributes = __parse_test_attributes(extra, attrib)
705 return {
706 "value": extra_value,
707 "name": extra_name,
708 "type": extra_type,
709 "attributes": extra_attributes
710 }
711
712
713 def __expand_input_elems(root_elem, prefix=""):
714 __append_prefix_to_params(root_elem, prefix)
715
716 repeat_elems = root_elem.findall('repeat')
717 indices = {}
718 for repeat_elem in repeat_elems:
719 name = repeat_elem.get("name")
720 if name not in indices:
721 indices[name] = 0
722 index = 0
723 else:
724 index = indices[name] + 1
725 indices[name] = index
726
727 new_prefix = __prefix_join(prefix, name, index=index)
728 __expand_input_elems(repeat_elem, new_prefix)
729 __pull_up_params(root_elem, repeat_elem)
730
731 cond_elems = root_elem.findall('conditional')
732 for cond_elem in cond_elems:
733 new_prefix = __prefix_join(prefix, cond_elem.get("name"))
734 __expand_input_elems(cond_elem, new_prefix)
735 __pull_up_params(root_elem, cond_elem)
736
737 section_elems = root_elem.findall('section')
738 for section_elem in section_elems:
739 new_prefix = __prefix_join(prefix, section_elem.get("name"))
740 __expand_input_elems(section_elem, new_prefix)
741 __pull_up_params(root_elem, section_elem)
742
743
744 def __append_prefix_to_params(elem, prefix):
745 for param_elem in elem.findall('param'):
746 param_elem.set("name", __prefix_join(prefix, param_elem.get("name")))
747
748
749 def __pull_up_params(parent_elem, child_elem):
750 for param_elem in child_elem.findall('param'):
751 parent_elem.append(param_elem)
752
753
754 def __prefix_join(prefix, name, index=None):
755 name = name if index is None else "%s_%d" % (name, index)
756 return name if not prefix else "%s|%s" % (prefix, name)
757
758
759 def _copy_to_dict_if_present(elem, rval, attributes):
760 for attribute in attributes:
761 if attribute in elem.attrib:
762 rval[attribute] = elem.get(attribute)
763 return rval
764
765
766 def __parse_inputs_elems(test_elem, i):
767 raw_inputs = []
768 for param_elem in test_elem.findall("param"):
769 raw_inputs.append(__parse_param_elem(param_elem, i))
770
771 return raw_inputs
772
773
774 def __parse_param_elem(param_elem, i=0):
775 attrib = dict(param_elem.attrib)
776 if 'values' in attrib:
777 value = attrib['values'].split(',')
778 elif 'value' in attrib:
779 value = attrib['value']
780 else:
781 value = None
782 children_elem = param_elem
783 if children_elem is not None:
784 # At this time, we can assume having children only
785 # occurs on DataToolParameter test items but this could
786 # change and would cause the below parsing to change
787 # based upon differences in children items
788 attrib['metadata'] = {}
789 attrib['composite_data'] = []
790 attrib['edit_attributes'] = []
791 # Composite datasets need to be renamed uniquely
792 composite_data_name = None
793 for child in children_elem:
794 if child.tag == 'composite_data':
795 file_name = child.get("value")
796 attrib['composite_data'].append(file_name)
797 if composite_data_name is None:
798 # Generate a unique name; each test uses a
799 # fresh history.
800 composite_data_name = '_COMPOSITE_RENAMED_t%d_%s' \
801 % (i, uuid.uuid1().hex)
802 elif child.tag == 'metadata':
803 attrib['metadata'][child.get("name")] = child.get("value")
804 elif child.tag == 'edit_attributes':
805 attrib['edit_attributes'].append(child)
806 elif child.tag == 'collection':
807 attrib['collection'] = TestCollectionDef.from_xml(child, __parse_param_elem)
808 if composite_data_name:
809 # Composite datasets need implicit renaming;
810 # inserted at front of list so explicit declarations
811 # take precedence
812 attrib['edit_attributes'].insert(0, {'type': 'name', 'value': composite_data_name})
813 name = attrib.pop('name')
814 return {
815 "name": name,
816 "value": value,
817 "attributes": attrib
818 }
819
820
821 class StdioParser(object):
822
823 def __init__(self, root):
824 try:
825 self.stdio_exit_codes = list()
826 self.stdio_regexes = list()
827
828 # We should have a single <stdio> element, but handle the case for
829 # multiples.
830 # For every stdio element, add all of the exit_code and regex
831 # subelements that we find:
832 for stdio_elem in (root.findall('stdio')):
833 self.parse_stdio_exit_codes(stdio_elem)
834 self.parse_stdio_regexes(stdio_elem)
835 except Exception:
836 log.exception("Exception in parse_stdio!")
837
838 def parse_stdio_exit_codes(self, stdio_elem):
839 """
840 Parse the tool's <stdio> element's <exit_code> subelements.
841 This will add all of those elements, if any, to self.stdio_exit_codes.
842 """
843 try:
844 # Look for all <exit_code> elements. Each exit_code element must
845 # have a range/value.
846 # Exit-code ranges have precedence over a single exit code.
847 # So if there are value and range attributes, we use the range
848 # attribute. If there is neither a range nor a value, then print
849 # a warning and skip to the next.
850 for exit_code_elem in (stdio_elem.findall("exit_code")):
851 exit_code = ToolStdioExitCode()
852 # Each exit code has an optional description that can be
853 # part of the "desc" or "description" attributes:
854 exit_code.desc = exit_code_elem.get("desc")
855 if exit_code.desc is None:
856 exit_code.desc = exit_code_elem.get("description")
857 # Parse the error level:
858 exit_code.error_level = (
859 self.parse_error_level(exit_code_elem.get("level")))
860 code_range = exit_code_elem.get("range", "")
861 if code_range is None:
862 code_range = exit_code_elem.get("value", "")
863 if code_range is None:
864 log.warning("Tool stdio exit codes must have a range or value")
865 continue
866 # Parse the range. We look for:
867 # :Y
868 # X:
869 # X:Y - Split on the colon. We do not allow a colon
870 # without a beginning or end, though we could.
871 # Also note that whitespace is eliminated.
872 # TODO: Turn this into a single match - it should be
873 # more efficient.
874 code_range = re.sub(r"\s", "", code_range)
875 code_ranges = re.split(r":", code_range)
876 if (len(code_ranges) == 2):
877 if (code_ranges[0] is None or '' == code_ranges[0]):
878 exit_code.range_start = float("-inf")
879 else:
880 exit_code.range_start = int(code_ranges[0])
881 if (code_ranges[1] is None or '' == code_ranges[1]):
882 exit_code.range_end = float("inf")
883 else:
884 exit_code.range_end = int(code_ranges[1])
885 # If we got more than one colon, then ignore the exit code.
886 elif (len(code_ranges) > 2):
887 log.warning("Invalid tool exit_code range %s - ignored"
888 % code_range)
889 continue
890 # Else we have a singular value. If it's not an integer, then
891 # we'll just write a log message and skip this exit_code.
892 else:
893 try:
894 exit_code.range_start = int(code_range)
895 except Exception:
896 log.error(code_range)
897 log.warning("Invalid range start for tool's exit_code %s: exit_code ignored" % code_range)
898 continue
899 exit_code.range_end = exit_code.range_start
900 # TODO: Check if we got ">", ">=", "<", or "<=":
901 # Check that the range, regardless of how we got it,
902 # isn't bogus. If we have two infinite values, then
903 # the start must be -inf and the end must be +inf.
904 # So at least warn about this situation:
905 if isinf(exit_code.range_start) and isinf(exit_code.range_end):
906 log.warning("Tool exit_code range %s will match on all exit codes" % code_range)
907 self.stdio_exit_codes.append(exit_code)
908 except Exception:
909 log.exception("Exception in parse_stdio_exit_codes!")
910
911 def parse_stdio_regexes(self, stdio_elem):
912 """
913 Look in the tool's <stdio> elem for all <regex> subelements
914 that define how to look for warnings and fatal errors in
915 stdout and stderr. This will add all such regex elements
916 to the Tols's stdio_regexes list.
917 """
918 try:
919 # Look for every <regex> subelement. The regular expression
920 # will have "match" and "source" (or "src") attributes.
921 for regex_elem in (stdio_elem.findall("regex")):
922 # TODO: Fill in ToolStdioRegex
923 regex = ToolStdioRegex()
924 # Each regex has an optional description that can be
925 # part of the "desc" or "description" attributes:
926 regex.desc = regex_elem.get("desc")
927 if regex.desc is None:
928 regex.desc = regex_elem.get("description")
929 # Parse the error level
930 regex.error_level = (
931 self.parse_error_level(regex_elem.get("level")))
932 regex.match = regex_elem.get("match", "")
933 if regex.match is None:
934 # TODO: Convert the offending XML element to a string
935 log.warning("Ignoring tool's stdio regex element %s - "
936 "the 'match' attribute must exist")
937 continue
938 # Parse the output sources. We look for the "src", "source",
939 # and "sources" attributes, in that order. If there is no
940 # such source, then the source defaults to stderr & stdout.
941 # Look for a comma and then look for "err", "error", "out",
942 # and "output":
943 output_srcs = regex_elem.get("src")
944 if output_srcs is None:
945 output_srcs = regex_elem.get("source")
946 if output_srcs is None:
947 output_srcs = regex_elem.get("sources")
948 if output_srcs is None:
949 output_srcs = "output,error"
950 output_srcs = re.sub(r"\s", "", output_srcs)
951 src_list = re.split(r",", output_srcs)
952 # Just put together anything to do with "out", including
953 # "stdout", "output", etc. Repeat for "stderr", "error",
954 # and anything to do with "err". If neither stdout nor
955 # stderr were specified, then raise a warning and scan both.
956 for src in src_list:
957 if re.search("both", src, re.IGNORECASE):
958 regex.stdout_match = True
959 regex.stderr_match = True
960 if re.search("out", src, re.IGNORECASE):
961 regex.stdout_match = True
962 if re.search("err", src, re.IGNORECASE):
963 regex.stderr_match = True
964 if (not regex.stdout_match and not regex.stderr_match):
965 log.warning("Tool id %s: unable to determine if tool "
966 "stream source scanning is output, error, "
967 "or both. Defaulting to use both." % self.id)
968 regex.stdout_match = True
969 regex.stderr_match = True
970 self.stdio_regexes.append(regex)
971 except Exception:
972 log.exception("Exception in parse_stdio_exit_codes!")
973
974 # TODO: This method doesn't have to be part of the Tool class.
975 def parse_error_level(self, err_level):
976 """
977 Parses error level and returns error level enumeration. If
978 unparsable, returns 'fatal'
979 """
980 return_level = StdioErrorLevel.FATAL
981 try:
982 if err_level:
983 if (re.search("log", err_level, re.IGNORECASE)):
984 return_level = StdioErrorLevel.LOG
985 elif (re.search("qc", err_level, re.IGNORECASE)):
986 return_level = StdioErrorLevel.QC
987 elif (re.search("warning", err_level, re.IGNORECASE)):
988 return_level = StdioErrorLevel.WARNING
989 elif (re.search("fatal_oom", err_level, re.IGNORECASE)):
990 return_level = StdioErrorLevel.FATAL_OOM
991 elif (re.search("fatal", err_level, re.IGNORECASE)):
992 return_level = StdioErrorLevel.FATAL
993 else:
994 log.debug("Tool %s: error level %s did not match log/warning/fatal" %
995 (self.id, err_level))
996 except Exception:
997 log.exception("Exception in parse_error_level")
998 return return_level
999
1000
1001 class XmlPagesSource(PagesSource):
1002
1003 def __init__(self, root):
1004 self.input_elem = root.find("inputs")
1005 page_sources = []
1006 if self.input_elem is not None:
1007 pages_elem = self.input_elem.findall("page")
1008 for page in (pages_elem or [self.input_elem]):
1009 page_sources.append(XmlPageSource(page))
1010 super(XmlPagesSource, self).__init__(page_sources)
1011
1012 @property
1013 def inputs_defined(self):
1014 return self.input_elem is not None
1015
1016
1017 class XmlPageSource(PageSource):
1018
1019 def __init__(self, parent_elem):
1020 self.parent_elem = parent_elem
1021
1022 def parse_display(self):
1023 display_elem = self.parent_elem.find("display")
1024 if display_elem is not None:
1025 display = xml_to_string(display_elem)
1026 else:
1027 display = None
1028 return display
1029
1030 def parse_input_sources(self):
1031 return map(XmlInputSource, self.parent_elem)
1032
1033
1034 class XmlInputSource(InputSource):
1035
1036 def __init__(self, input_elem):
1037 self.input_elem = input_elem
1038 self.input_type = self.input_elem.tag
1039
1040 def parse_input_type(self):
1041 return self.input_type
1042
1043 def elem(self):
1044 return self.input_elem
1045
1046 def get(self, key, value=None):
1047 return self.input_elem.get(key, value)
1048
1049 def get_bool(self, key, default):
1050 return string_as_bool(self.get(key, default))
1051
1052 def parse_label(self):
1053 return xml_text(self.input_elem, "label")
1054
1055 def parse_help(self):
1056 return xml_text(self.input_elem, "help")
1057
1058 def parse_sanitizer_elem(self):
1059 return self.input_elem.find("sanitizer")
1060
1061 def parse_validator_elems(self):
1062 return self.input_elem.findall("validator")
1063
1064 def parse_dynamic_options_elem(self):
1065 """ Return a galaxy.tools.parameters.dynamic_options.DynamicOptions
1066 if appropriate.
1067 """
1068 options_elem = self.input_elem.find('options')
1069 return options_elem
1070
1071 def parse_static_options(self):
1072 static_options = list()
1073 elem = self.input_elem
1074 for option in elem.findall("option"):
1075 value = option.get("value")
1076 selected = string_as_bool(option.get("selected", False))
1077 static_options.append((option.text or value, value, selected))
1078 return static_options
1079
1080 def parse_optional(self, default=None):
1081 """ Return boolean indicating whether parameter is optional. """
1082 elem = self.input_elem
1083 if self.get('type') == "data_column":
1084 # Allow specifing force_select for backward compat., but probably
1085 # should use optional going forward for consistency with other
1086 # parameters.
1087 if "force_select" in elem.attrib:
1088 force_select = string_as_bool(elem.get("force_select"))
1089 else:
1090 force_select = not string_as_bool(elem.get("optional", False))
1091 return not force_select
1092
1093 if default is None:
1094 default = self.default_optional
1095 return self.get_bool("optional", default)
1096
1097 def parse_conversion_tuples(self):
1098 elem = self.input_elem
1099 conversions = []
1100 for conv_elem in elem.findall("conversion"):
1101 name = conv_elem.get("name") # name for commandline substitution
1102 conv_extensions = conv_elem.get("type") # target datatype extension
1103 conversions.append((name, conv_extensions))
1104 return conversions
1105
1106 def parse_nested_inputs_source(self):
1107 elem = self.input_elem
1108 return XmlPageSource(elem)
1109
1110 def parse_test_input_source(self):
1111 elem = self.input_elem
1112 input_elem = elem.find("param")
1113 assert input_elem is not None, "<conditional> must have a child <param>"
1114 return XmlInputSource(input_elem)
1115
1116 def parse_when_input_sources(self):
1117 elem = self.input_elem
1118
1119 sources = []
1120 for case_elem in elem.findall("when"):
1121 value = case_elem.get("value")
1122 case_page_source = XmlPageSource(case_elem)
1123 sources.append((value, case_page_source))
1124 return sources
1125
1126
1127 class ParallelismInfo(object):
1128 """
1129 Stores the information (if any) for running multiple instances of the tool in parallel
1130 on the same set of inputs.
1131 """
1132
1133 def __init__(self, tag):
1134 self.method = tag.get('method')
1135 if isinstance(tag, dict):
1136 items = tag.items()
1137 else:
1138 items = tag.attrib.items()
1139 self.attributes = dict([item for item in items if item[0] != 'method'])
1140 if len(self.attributes) == 0:
1141 # legacy basic mode - provide compatible defaults
1142 self.attributes['split_size'] = 20
1143 self.attributes['split_mode'] = 'number_of_parts'