annotate env/lib/python3.7/site-packages/gxformat2/markdown_parse.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1 """Utilities for parsing "Galaxy Flavored Markdown".
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
2
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
3 See markdown_util.py for loading objects and interacting with the rest of Galaxy.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
4 This file is meant to be relatively self contained and just used to "parse" and validate
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
5 Galaxy Markdown. Keeping things isolated to allow re-use of these utilities in other
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
6 projects (e.g. gxformat2).
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
7 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
8 import re
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
9
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
10 BLOCK_FENCE_START = re.compile(r'```.*')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
11 BLOCK_FENCE_END = re.compile(r'```[\s]*')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
12 GALAXY_FLAVORED_MARKDOWN_CONTAINER_LINE_PATTERN = re.compile(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
13 r"```\s*galaxy\s*"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
14 )
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
15 VALID_CONTAINER_END_PATTERN = re.compile(r"^```\s*$")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
16 GALAXY_FLAVORED_MARKDOWN_CONTAINERS = [
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
17 "history_dataset_display",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
18 "history_dataset_collection_display",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
19 "history_dataset_as_image",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
20 "history_dataset_peek",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
21 "history_dataset_info",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
22 "workflow_display",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
23 "job_metrics",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
24 "job_parameters",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
25 "tool_stderr",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
26 "tool_stdout",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
27 ]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
28 INVOCATION_SECTIONS = [
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
29 "invocation_inputs",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
30 "invocation_outputs",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
31 ]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
32 ALL_CONTAINER_TYPES = GALAXY_FLAVORED_MARKDOWN_CONTAINERS + INVOCATION_SECTIONS
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
33 GALAXY_FLAVORED_MARKDOWN_CONTAINER_REGEX = "(%s)" % "|".join(ALL_CONTAINER_TYPES)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
34
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
35 ARG_VAL_REGEX = r'''[\w_\-]+|\"[^\"]+\"|\'[^\']+\''''
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
36 FUNCTION_ARG = r'\s*\w+\s*=\s*(?:%s)\s*' % ARG_VAL_REGEX
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
37 FUNCTION_CALL_LINE_TEMPLATE = r'\s*%s\s*\((?:' + FUNCTION_ARG + r')?\)\s*'
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
38 GALAXY_MARKDOWN_FUNCTION_CALL_LINE = re.compile(FUNCTION_CALL_LINE_TEMPLATE % GALAXY_FLAVORED_MARKDOWN_CONTAINER_REGEX)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
39 WHITE_SPACE_ONLY_PATTERN = re.compile(r"^[\s]+$")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
40
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
41
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
42 def validate_galaxy_markdown(galaxy_markdown, internal=True):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
43 """Validate the supplied markdown and throw an ValueError with reason if invalid."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
44 expecting_container_close_for = None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
45 last_line_no = 0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
46 function_calls = 0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
47 for (line, fenced, open_fence, line_no) in _split_markdown_lines(galaxy_markdown):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
48 last_line_no = line_no
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
49
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
50 def invalid_line(template, **kwd):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
51 if "line" in kwd:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
52 kwd["line"] = line.rstrip("\r\n")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
53 raise ValueError("Invalid line %d: %s" % (line_no + 1, template.format(**kwd)))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
54
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
55 expecting_container_close = expecting_container_close_for is not None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
56 if not fenced and expecting_container_close:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
57 invalid_line("[{line}] is not expected close line for [{expected_for}]", line=line, expected_for=expecting_container_close_for)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
58 continue
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
59 elif not fenced:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
60 continue
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
61 elif fenced and expecting_container_close and BLOCK_FENCE_END.match(line):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
62 # reset
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
63 expecting_container_close_for = None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
64 function_calls = 0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
65 elif open_fence and GALAXY_FLAVORED_MARKDOWN_CONTAINER_LINE_PATTERN.match(line):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
66 if expecting_container_close:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
67 if not VALID_CONTAINER_END_PATTERN.match(line):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
68 invalid_line("Invalid command close line [{line}] for [{expected_for}]", line=line, expected_for=expecting_container_close_for)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
69 # else closing container and we're done
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
70 expecting_container_close_for = None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
71 function_calls = 0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
72 continue
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
73
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
74 expecting_container_close_for = line
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
75 continue
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
76 elif fenced and line and expecting_container_close_for:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
77 if GALAXY_MARKDOWN_FUNCTION_CALL_LINE.match(line):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
78 function_calls += 1
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
79 if function_calls > 1:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
80 invalid_line("Only one Galaxy directive is allowed per fenced Galaxy block (```galaxy)")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
81 continue
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
82 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
83 invalid_line("Invalid embedded Galaxy markup line [{line}]", line=line)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
84
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
85 # Markdown unrelated to Galaxy object containers.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
86 continue
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
87
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
88 if expecting_container_close_for:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
89 template = "Invalid line %d: %s"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
90 msg = template % (last_line_no, "close of block for [{expected_for}] expected".format(expected_for=expecting_container_close_for))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
91 raise ValueError(msg)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
92
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
93
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
94 def _split_markdown_lines(markdown):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
95 """Yield lines of a markdown document line-by-line keeping track of fencing.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
96
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
97 'Fenced' lines are code-like block (e.g. between ```) that shouldn't contain
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
98 Markdown markup.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
99 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
100 block_fenced = False
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
101 indent_fenced = False
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
102 for line_number, line in enumerate(markdown.splitlines(True)):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
103 open_fence_this_iteration = False
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
104 indent_fenced = line.startswith(" ") or (indent_fenced and WHITE_SPACE_ONLY_PATTERN.match(line))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
105 if not block_fenced:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
106 if BLOCK_FENCE_START.match(line):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
107 open_fence_this_iteration = True
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
108 block_fenced = True
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
109 yield (line, block_fenced or indent_fenced, open_fence_this_iteration, line_number)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
110 if not open_fence_this_iteration and BLOCK_FENCE_END.match(line):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
111 block_fenced = False
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
112
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
113
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
114 __all__ = (
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
115 'validate_galaxy_markdown',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
116 'GALAXY_MARKDOWN_FUNCTION_CALL_LINE',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
117 )