Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/humanfriendly/usage.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 # Human friendly input/output in Python. | |
2 # | |
3 # Author: Peter Odding <peter@peterodding.com> | |
4 # Last Change: June 24, 2017 | |
5 # URL: https://humanfriendly.readthedocs.io | |
6 | |
7 """ | |
8 Parsing and reformatting of usage messages. | |
9 | |
10 The :mod:`~humanfriendly.usage` module parses and reformats usage messages: | |
11 | |
12 - The :func:`format_usage()` function takes a usage message and inserts ANSI | |
13 escape sequences that highlight items of special significance like command | |
14 line options, meta variables, etc. The resulting usage message is (intended | |
15 to be) easier to read on a terminal. | |
16 | |
17 - The :func:`render_usage()` function takes a usage message and rewrites it to | |
18 reStructuredText_ suitable for inclusion in the documentation of a Python | |
19 package. This provides a DRY solution to keeping a single authoritative | |
20 definition of the usage message while making it easily available in | |
21 documentation. As a cherry on the cake it's not just a pre-formatted dump of | |
22 the usage message but a nicely formatted reStructuredText_ fragment. | |
23 | |
24 - The remaining functions in this module support the two functions above. | |
25 | |
26 Usage messages in general are free format of course, however the functions in | |
27 this module assume a certain structure from usage messages in order to | |
28 successfully parse and reformat them, refer to :func:`parse_usage()` for | |
29 details. | |
30 | |
31 .. _DRY: https://en.wikipedia.org/wiki/Don%27t_repeat_yourself | |
32 .. _reStructuredText: https://en.wikipedia.org/wiki/ReStructuredText | |
33 """ | |
34 | |
35 # Standard library modules. | |
36 import csv | |
37 import functools | |
38 import logging | |
39 import re | |
40 | |
41 # Standard library module or external dependency (see setup.py). | |
42 from importlib import import_module | |
43 | |
44 # Modules included in our package. | |
45 from humanfriendly.compat import StringIO | |
46 from humanfriendly.text import dedent, split_paragraphs, trim_empty_lines | |
47 | |
48 # Public identifiers that require documentation. | |
49 __all__ = ( | |
50 'find_meta_variables', | |
51 'format_usage', | |
52 'import_module', # previously exported (backwards compatibility) | |
53 'inject_usage', | |
54 'parse_usage', | |
55 'render_usage', | |
56 'USAGE_MARKER', | |
57 ) | |
58 | |
59 USAGE_MARKER = "Usage:" | |
60 """The string that starts the first line of a usage message.""" | |
61 | |
62 START_OF_OPTIONS_MARKER = "Supported options:" | |
63 """The string that marks the start of the documented command line options.""" | |
64 | |
65 # Compiled regular expression used to tokenize usage messages. | |
66 USAGE_PATTERN = re.compile(r''' | |
67 # Make sure whatever we're matching isn't preceded by a non-whitespace | |
68 # character. | |
69 (?<!\S) | |
70 ( | |
71 # A short command line option or a long command line option | |
72 # (possibly including a meta variable for a value). | |
73 (-\w|--\w+(-\w+)*(=\S+)?) | |
74 # Or ... | |
75 | | |
76 # An environment variable. | |
77 \$[A-Za-z_][A-Za-z0-9_]* | |
78 # Or ... | |
79 | | |
80 # Might be a meta variable (usage() will figure it out). | |
81 [A-Z][A-Z0-9_]+ | |
82 ) | |
83 ''', re.VERBOSE) | |
84 | |
85 # Compiled regular expression used to recognize options. | |
86 OPTION_PATTERN = re.compile(r'^(-\w|--\w+(-\w+)*(=\S+)?)$') | |
87 | |
88 # Initialize a logger for this module. | |
89 logger = logging.getLogger(__name__) | |
90 | |
91 | |
92 def format_usage(usage_text): | |
93 """ | |
94 Highlight special items in a usage message. | |
95 | |
96 :param usage_text: The usage message to process (a string). | |
97 :returns: The usage message with special items highlighted. | |
98 | |
99 This function highlights the following special items: | |
100 | |
101 - The initial line of the form "Usage: ..." | |
102 - Short and long command line options | |
103 - Environment variables | |
104 - Meta variables (see :func:`find_meta_variables()`) | |
105 | |
106 All items are highlighted in the color defined by | |
107 :data:`.HIGHLIGHT_COLOR`. | |
108 """ | |
109 # Ugly workaround to avoid circular import errors due to interdependencies | |
110 # between the humanfriendly.terminal and humanfriendly.usage modules. | |
111 from humanfriendly.terminal import ansi_wrap, HIGHLIGHT_COLOR | |
112 formatted_lines = [] | |
113 meta_variables = find_meta_variables(usage_text) | |
114 for line in usage_text.strip().splitlines(True): | |
115 if line.startswith(USAGE_MARKER): | |
116 # Highlight the "Usage: ..." line in bold font and color. | |
117 formatted_lines.append(ansi_wrap(line, color=HIGHLIGHT_COLOR)) | |
118 else: | |
119 # Highlight options, meta variables and environment variables. | |
120 formatted_lines.append(replace_special_tokens( | |
121 line, meta_variables, | |
122 lambda token: ansi_wrap(token, color=HIGHLIGHT_COLOR), | |
123 )) | |
124 return ''.join(formatted_lines) | |
125 | |
126 | |
127 def find_meta_variables(usage_text): | |
128 """ | |
129 Find the meta variables in the given usage message. | |
130 | |
131 :param usage_text: The usage message to parse (a string). | |
132 :returns: A list of strings with any meta variables found in the usage | |
133 message. | |
134 | |
135 When a command line option requires an argument, the convention is to | |
136 format such options as ``--option=ARG``. The text ``ARG`` in this example | |
137 is the meta variable. | |
138 """ | |
139 meta_variables = set() | |
140 for match in USAGE_PATTERN.finditer(usage_text): | |
141 token = match.group(0) | |
142 if token.startswith('-'): | |
143 option, _, value = token.partition('=') | |
144 if value: | |
145 meta_variables.add(value) | |
146 return list(meta_variables) | |
147 | |
148 | |
149 def parse_usage(text): | |
150 """ | |
151 Parse a usage message by inferring its structure (and making some assumptions :-). | |
152 | |
153 :param text: The usage message to parse (a string). | |
154 :returns: A tuple of two lists: | |
155 | |
156 1. A list of strings with the paragraphs of the usage message's | |
157 "introduction" (the paragraphs before the documentation of the | |
158 supported command line options). | |
159 | |
160 2. A list of strings with pairs of command line options and their | |
161 descriptions: Item zero is a line listing a supported command | |
162 line option, item one is the description of that command line | |
163 option, item two is a line listing another supported command | |
164 line option, etc. | |
165 | |
166 Usage messages in general are free format of course, however | |
167 :func:`parse_usage()` assume a certain structure from usage messages in | |
168 order to successfully parse them: | |
169 | |
170 - The usage message starts with a line ``Usage: ...`` that shows a symbolic | |
171 representation of the way the program is to be invoked. | |
172 | |
173 - After some free form text a line ``Supported options:`` (surrounded by | |
174 empty lines) precedes the documentation of the supported command line | |
175 options. | |
176 | |
177 - The command line options are documented as follows:: | |
178 | |
179 -v, --verbose | |
180 | |
181 Make more noise. | |
182 | |
183 So all of the variants of the command line option are shown together on a | |
184 separate line, followed by one or more paragraphs describing the option. | |
185 | |
186 - There are several other minor assumptions, but to be honest I'm not sure if | |
187 anyone other than me is ever going to use this functionality, so for now I | |
188 won't list every intricate detail :-). | |
189 | |
190 If you're curious anyway, refer to the usage message of the `humanfriendly` | |
191 package (defined in the :mod:`humanfriendly.cli` module) and compare it with | |
192 the usage message you see when you run ``humanfriendly --help`` and the | |
193 generated usage message embedded in the readme. | |
194 | |
195 Feel free to request more detailed documentation if you're interested in | |
196 using the :mod:`humanfriendly.usage` module outside of the little ecosystem | |
197 of Python packages that I have been building over the past years. | |
198 """ | |
199 introduction = [] | |
200 documented_options = [] | |
201 # Split the raw usage message into paragraphs. | |
202 paragraphs = split_paragraphs(text) | |
203 # Get the paragraphs that are part of the introduction. | |
204 while paragraphs: | |
205 # Check whether we've found the end of the introduction. | |
206 end_of_intro = (paragraphs[0] == START_OF_OPTIONS_MARKER) | |
207 # Append the current paragraph to the introduction. | |
208 introduction.append(paragraphs.pop(0)) | |
209 # Stop after we've processed the complete introduction. | |
210 if end_of_intro: | |
211 break | |
212 logger.debug("Parsed introduction: %s", introduction) | |
213 # Parse the paragraphs that document command line options. | |
214 while paragraphs: | |
215 documented_options.append(dedent(paragraphs.pop(0))) | |
216 description = [] | |
217 while paragraphs: | |
218 # Check if the next paragraph starts the documentation of another | |
219 # command line option. We split on a comma followed by a space so | |
220 # that our parsing doesn't trip up when the label used for an | |
221 # option's value contains commas. | |
222 tokens = [t.strip() for t in re.split(r',\s', paragraphs[0]) if t and not t.isspace()] | |
223 if all(OPTION_PATTERN.match(t) for t in tokens): | |
224 break | |
225 else: | |
226 description.append(paragraphs.pop(0)) | |
227 # Join the description's paragraphs back together so we can remove | |
228 # common leading indentation. | |
229 documented_options.append(dedent('\n\n'.join(description))) | |
230 logger.debug("Parsed options: %s", documented_options) | |
231 return introduction, documented_options | |
232 | |
233 | |
234 def render_usage(text): | |
235 """ | |
236 Reformat a command line program's usage message to reStructuredText_. | |
237 | |
238 :param text: The plain text usage message (a string). | |
239 :returns: The usage message rendered to reStructuredText_ (a string). | |
240 """ | |
241 meta_variables = find_meta_variables(text) | |
242 introduction, options = parse_usage(text) | |
243 output = [render_paragraph(p, meta_variables) for p in introduction] | |
244 if options: | |
245 output.append('\n'.join([ | |
246 '.. csv-table::', | |
247 ' :header: Option, Description', | |
248 ' :widths: 30, 70', | |
249 '', | |
250 ])) | |
251 csv_buffer = StringIO() | |
252 csv_writer = csv.writer(csv_buffer) | |
253 while options: | |
254 variants = options.pop(0) | |
255 description = options.pop(0) | |
256 csv_writer.writerow([ | |
257 render_paragraph(variants, meta_variables), | |
258 ('\n\n'.join(render_paragraph(p, meta_variables) for p in split_paragraphs(description))).rstrip(), | |
259 ]) | |
260 csv_lines = csv_buffer.getvalue().splitlines() | |
261 output.append('\n'.join(' %s' % l for l in csv_lines)) | |
262 logger.debug("Rendered output: %s", output) | |
263 return '\n\n'.join(trim_empty_lines(o) for o in output) | |
264 | |
265 | |
266 def inject_usage(module_name): | |
267 """ | |
268 Use cog_ to inject a usage message into a reStructuredText_ file. | |
269 | |
270 :param module_name: The name of the module whose ``__doc__`` attribute is | |
271 the source of the usage message (a string). | |
272 | |
273 This simple wrapper around :func:`render_usage()` makes it very easy to | |
274 inject a reformatted usage message into your documentation using cog_. To | |
275 use it you add a fragment like the following to your ``*.rst`` file:: | |
276 | |
277 .. [[[cog | |
278 .. from humanfriendly.usage import inject_usage | |
279 .. inject_usage('humanfriendly.cli') | |
280 .. ]]] | |
281 .. [[[end]]] | |
282 | |
283 The lines in the fragment above are single line reStructuredText_ comments | |
284 that are not copied to the output. Their purpose is to instruct cog_ where | |
285 to inject the reformatted usage message. Once you've added these lines to | |
286 your ``*.rst`` file, updating the rendered usage message becomes really | |
287 simple thanks to cog_: | |
288 | |
289 .. code-block:: sh | |
290 | |
291 $ cog.py -r README.rst | |
292 | |
293 This will inject or replace the rendered usage message in your | |
294 ``README.rst`` file with an up to date copy. | |
295 | |
296 .. _cog: http://nedbatchelder.com/code/cog/ | |
297 """ | |
298 import cog | |
299 usage_text = import_module(module_name).__doc__ | |
300 cog.out("\n" + render_usage(usage_text) + "\n\n") | |
301 | |
302 | |
303 def render_paragraph(paragraph, meta_variables): | |
304 # Reformat the "Usage:" line to highlight "Usage:" in bold and show the | |
305 # remainder of the line as pre-formatted text. | |
306 if paragraph.startswith(USAGE_MARKER): | |
307 tokens = paragraph.split() | |
308 return "**%s** `%s`" % (tokens[0], ' '.join(tokens[1:])) | |
309 # Reformat the "Supported options:" line to highlight it in bold. | |
310 if paragraph == 'Supported options:': | |
311 return "**%s**" % paragraph | |
312 # Reformat shell transcripts into code blocks. | |
313 if re.match(r'^\s*\$\s+\S', paragraph): | |
314 # Split the paragraph into lines. | |
315 lines = paragraph.splitlines() | |
316 # Check if the paragraph is already indented. | |
317 if not paragraph[0].isspace(): | |
318 # If the paragraph isn't already indented we'll indent it now. | |
319 lines = [' %s' % line for line in lines] | |
320 lines.insert(0, '.. code-block:: sh') | |
321 lines.insert(1, '') | |
322 return "\n".join(lines) | |
323 # The following reformatting applies only to paragraphs which are not | |
324 # indented. Yes this is a hack - for now we assume that indented paragraphs | |
325 # are code blocks, even though this assumption can be wrong. | |
326 if not paragraph[0].isspace(): | |
327 # Change UNIX style `quoting' so it doesn't trip up DocUtils. | |
328 paragraph = re.sub("`(.+?)'", r'"\1"', paragraph) | |
329 # Escape asterisks. | |
330 paragraph = paragraph.replace('*', r'\*') | |
331 # Reformat inline tokens. | |
332 paragraph = replace_special_tokens( | |
333 paragraph, meta_variables, | |
334 lambda token: '``%s``' % token, | |
335 ) | |
336 return paragraph | |
337 | |
338 | |
339 def replace_special_tokens(text, meta_variables, replace_fn): | |
340 return USAGE_PATTERN.sub(functools.partial( | |
341 replace_tokens_callback, | |
342 meta_variables=meta_variables, | |
343 replace_fn=replace_fn | |
344 ), text) | |
345 | |
346 | |
347 def replace_tokens_callback(match, meta_variables, replace_fn): | |
348 token = match.group(0) | |
349 if not (re.match('^[A-Z][A-Z0-9_]+$', token) and token not in meta_variables): | |
350 token = replace_fn(token) | |
351 return token |