comparison env/lib/python3.7/site-packages/docutils/parsers/rst/states.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # $Id: states.py 8359 2019-08-26 16:45:33Z milde $
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
4
5 """
6 This is the ``docutils.parsers.rst.states`` module, the core of
7 the reStructuredText parser. It defines the following:
8
9 :Classes:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
30
31 :Exception classes:
32 - `MarkupError`
33 - `ParserError`
34 - `MarkupMismatch`
35
36 :Functions:
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
39
40 :Attributes:
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
42
43 Parser Overview
44 ===============
45
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
51
52 Parsing proceeds as follows:
53
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
58
59 2. The method associated with the matched transition pattern is called.
60
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
65
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
70
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
73
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
83
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
86
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
90
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
93
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
96 used.
97
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
101 """
102
103 __docformat__ = 'reStructuredText'
104
105
106 import sys
107 import re
108 from types import FunctionType, MethodType
109
110 from docutils import nodes, statemachine, utils
111 from docutils import ApplicationError, DataError
112 from docutils.statemachine import StateMachineWS, StateWS
113 from docutils.nodes import fully_normalize_name as normalize_name
114 from docutils.nodes import whitespace_normalize_name
115 import docutils.parsers.rst
116 from docutils.parsers.rst import directives, languages, tableparser, roles
117 from docutils.parsers.rst.languages import en as _fallback_language_module
118 from docutils.utils import escape2null, unescape, column_width
119 from docutils.utils import punctuation_chars, roman, urischemes
120 from docutils.utils import split_escaped_whitespace
121
122 class MarkupError(DataError): pass
123 class UnknownInterpretedRoleError(DataError): pass
124 class InterpretedRoleNotImplementedError(DataError): pass
125 class ParserError(ApplicationError): pass
126 class MarkupMismatch(Exception): pass
127
128
129 class Struct(object):
130
131 """Stores data attributes for dotted-attribute access."""
132
133 def __init__(self, **keywordargs):
134 self.__dict__.update(keywordargs)
135
136
137 class RSTStateMachine(StateMachineWS):
138
139 """
140 reStructuredText's master StateMachine.
141
142 The entry point to reStructuredText parsing is the `run()` method.
143 """
144
145 def run(self, input_lines, document, input_offset=0, match_titles=True,
146 inliner=None):
147 """
148 Parse `input_lines` and modify the `document` node in place.
149
150 Extend `StateMachineWS.run()`: set up parse-global data and
151 run the StateMachine.
152 """
153 self.language = languages.get_language(
154 document.settings.language_code)
155 self.match_titles = match_titles
156 if inliner is None:
157 inliner = Inliner()
158 inliner.init_customizations(document.settings)
159 self.memo = Struct(document=document,
160 reporter=document.reporter,
161 language=self.language,
162 title_styles=[],
163 section_level=0,
164 section_bubble_up_kludge=False,
165 inliner=inliner)
166 self.document = document
167 self.attach_observer(document.note_source)
168 self.reporter = self.memo.reporter
169 self.node = document
170 results = StateMachineWS.run(self, input_lines, input_offset,
171 input_source=document['source'])
172 assert results == [], 'RSTStateMachine.run() results should be empty!'
173 self.node = self.memo = None # remove unneeded references
174
175
176 class NestedStateMachine(StateMachineWS):
177
178 """
179 StateMachine run from within other StateMachine runs, to parse nested
180 document structures.
181 """
182
183 def run(self, input_lines, input_offset, memo, node, match_titles=True):
184 """
185 Parse `input_lines` and populate a `docutils.nodes.document` instance.
186
187 Extend `StateMachineWS.run()`: set up document-wide data.
188 """
189 self.match_titles = match_titles
190 self.memo = memo
191 self.document = memo.document
192 self.attach_observer(self.document.note_source)
193 self.reporter = memo.reporter
194 self.language = memo.language
195 self.node = node
196 results = StateMachineWS.run(self, input_lines, input_offset)
197 assert results == [], ('NestedStateMachine.run() results should be '
198 'empty!')
199 return results
200
201
202 class RSTState(StateWS):
203
204 """
205 reStructuredText State superclass.
206
207 Contains methods used by all State subclasses.
208 """
209
210 nested_sm = NestedStateMachine
211 nested_sm_cache = []
212
213 def __init__(self, state_machine, debug=False):
214 self.nested_sm_kwargs = {'state_classes': state_classes,
215 'initial_state': 'Body'}
216 StateWS.__init__(self, state_machine, debug)
217
218 def runtime_init(self):
219 StateWS.runtime_init(self)
220 memo = self.state_machine.memo
221 self.memo = memo
222 self.reporter = memo.reporter
223 self.inliner = memo.inliner
224 self.document = memo.document
225 self.parent = self.state_machine.node
226 # enable the reporter to determine source and source-line
227 if not hasattr(self.reporter, 'get_source_and_line'):
228 self.reporter.get_source_and_line = self.state_machine.get_source_and_line
229
230
231 def goto_line(self, abs_line_offset):
232 """
233 Jump to input line `abs_line_offset`, ignoring jumps past the end.
234 """
235 try:
236 self.state_machine.goto_line(abs_line_offset)
237 except EOFError:
238 pass
239
240 def no_match(self, context, transitions):
241 """
242 Override `StateWS.no_match` to generate a system message.
243
244 This code should never be run.
245 """
246 self.reporter.severe(
247 'Internal error: no transition pattern match. State: "%s"; '
248 'transitions: %s; context: %s; current line: %r.'
249 % (self.__class__.__name__, transitions, context,
250 self.state_machine.line))
251 return context, None, []
252
253 def bof(self, context):
254 """Called at beginning of file."""
255 return [], []
256
257 def nested_parse(self, block, input_offset, node, match_titles=False,
258 state_machine_class=None, state_machine_kwargs=None):
259 """
260 Create a new StateMachine rooted at `node` and run it over the input
261 `block`.
262 """
263 use_default = 0
264 if state_machine_class is None:
265 state_machine_class = self.nested_sm
266 use_default += 1
267 if state_machine_kwargs is None:
268 state_machine_kwargs = self.nested_sm_kwargs
269 use_default += 1
270 block_length = len(block)
271
272 state_machine = None
273 if use_default == 2:
274 try:
275 state_machine = self.nested_sm_cache.pop()
276 except IndexError:
277 pass
278 if not state_machine:
279 state_machine = state_machine_class(debug=self.debug,
280 **state_machine_kwargs)
281 state_machine.run(block, input_offset, memo=self.memo,
282 node=node, match_titles=match_titles)
283 if use_default == 2:
284 self.nested_sm_cache.append(state_machine)
285 else:
286 state_machine.unlink()
287 new_offset = state_machine.abs_line_offset()
288 # No `block.parent` implies disconnected -- lines aren't in sync:
289 if block.parent and (len(block) - block_length) != 0:
290 # Adjustment for block if modified in nested parse:
291 self.state_machine.next_line(len(block) - block_length)
292 return new_offset
293
294 def nested_list_parse(self, block, input_offset, node, initial_state,
295 blank_finish,
296 blank_finish_state=None,
297 extra_settings={},
298 match_titles=False,
299 state_machine_class=None,
300 state_machine_kwargs=None):
301 """
302 Create a new StateMachine rooted at `node` and run it over the input
303 `block`. Also keep track of optional intermediate blank lines and the
304 required final one.
305 """
306 if state_machine_class is None:
307 state_machine_class = self.nested_sm
308 if state_machine_kwargs is None:
309 state_machine_kwargs = self.nested_sm_kwargs.copy()
310 state_machine_kwargs['initial_state'] = initial_state
311 state_machine = state_machine_class(debug=self.debug,
312 **state_machine_kwargs)
313 if blank_finish_state is None:
314 blank_finish_state = initial_state
315 state_machine.states[blank_finish_state].blank_finish = blank_finish
316 for key, value in extra_settings.items():
317 setattr(state_machine.states[initial_state], key, value)
318 state_machine.run(block, input_offset, memo=self.memo,
319 node=node, match_titles=match_titles)
320 blank_finish = state_machine.states[blank_finish_state].blank_finish
321 state_machine.unlink()
322 return state_machine.abs_line_offset(), blank_finish
323
324 def section(self, title, source, style, lineno, messages):
325 """Check for a valid subsection and create one if it checks out."""
326 if self.check_subsection(source, style, lineno):
327 self.new_subsection(title, lineno, messages)
328
329 def check_subsection(self, source, style, lineno):
330 """
331 Check for a valid subsection header. Return 1 (true) or None (false).
332
333 When a new section is reached that isn't a subsection of the current
334 section, back up the line count (use ``previous_line(-x)``), then
335 ``raise EOFError``. The current StateMachine will finish, then the
336 calling StateMachine can re-examine the title. This will work its way
337 back up the calling chain until the correct section level isreached.
338
339 @@@ Alternative: Evaluate the title, store the title info & level, and
340 back up the chain until that level is reached. Store in memo? Or
341 return in results?
342
343 :Exception: `EOFError` when a sibling or supersection encountered.
344 """
345 memo = self.memo
346 title_styles = memo.title_styles
347 mylevel = memo.section_level
348 try: # check for existing title style
349 level = title_styles.index(style) + 1
350 except ValueError: # new title style
351 if len(title_styles) == memo.section_level: # new subsection
352 title_styles.append(style)
353 return 1
354 else: # not at lowest level
355 self.parent += self.title_inconsistent(source, lineno)
356 return None
357 if level <= mylevel: # sibling or supersection
358 memo.section_level = level # bubble up to parent section
359 if len(style) == 2:
360 memo.section_bubble_up_kludge = True
361 # back up 2 lines for underline title, 3 for overline title
362 self.state_machine.previous_line(len(style) + 1)
363 raise EOFError # let parent section re-evaluate
364 if level == mylevel + 1: # immediate subsection
365 return 1
366 else: # invalid subsection
367 self.parent += self.title_inconsistent(source, lineno)
368 return None
369
370 def title_inconsistent(self, sourcetext, lineno):
371 error = self.reporter.severe(
372 'Title level inconsistent:', nodes.literal_block('', sourcetext),
373 line=lineno)
374 return error
375
376 def new_subsection(self, title, lineno, messages):
377 """Append new subsection to document tree. On return, check level."""
378 memo = self.memo
379 mylevel = memo.section_level
380 memo.section_level += 1
381 section_node = nodes.section()
382 self.parent += section_node
383 textnodes, title_messages = self.inline_text(title, lineno)
384 titlenode = nodes.title(title, '', *textnodes)
385 name = normalize_name(titlenode.astext())
386 section_node['names'].append(name)
387 section_node += titlenode
388 section_node += messages
389 section_node += title_messages
390 self.document.note_implicit_target(section_node, section_node)
391 offset = self.state_machine.line_offset + 1
392 absoffset = self.state_machine.abs_line_offset() + 1
393 newabsoffset = self.nested_parse(
394 self.state_machine.input_lines[offset:], input_offset=absoffset,
395 node=section_node, match_titles=True)
396 self.goto_line(newabsoffset)
397 if memo.section_level <= mylevel: # can't handle next section?
398 raise EOFError # bubble up to supersection
399 # reset section_level; next pass will detect it properly
400 memo.section_level = mylevel
401
402 def paragraph(self, lines, lineno):
403 """
404 Return a list (paragraph & messages) & a boolean: literal_block next?
405 """
406 data = '\n'.join(lines).rstrip()
407 if re.search(r'(?<!\\)(\\\\)*::$', data):
408 if len(data) == 2:
409 return [], 1
410 elif data[-3] in ' \n':
411 text = data[:-3].rstrip()
412 else:
413 text = data[:-1]
414 literalnext = 1
415 else:
416 text = data
417 literalnext = 0
418 textnodes, messages = self.inline_text(text, lineno)
419 p = nodes.paragraph(data, '', *textnodes)
420 p.source, p.line = self.state_machine.get_source_and_line(lineno)
421 return [p] + messages, literalnext
422
423 def inline_text(self, text, lineno):
424 """
425 Return 2 lists: nodes (text and inline elements), and system_messages.
426 """
427 nodes, messages = self.inliner.parse(text, lineno,
428 self.memo, self.parent)
429 return nodes, messages
430
431 def unindent_warning(self, node_name):
432 # the actual problem is one line below the current line
433 lineno = self.state_machine.abs_line_number()+1
434 return self.reporter.warning('%s ends without a blank line; '
435 'unexpected unindent.' % node_name,
436 line=lineno)
437
438
439 def build_regexp(definition, compile=True):
440 """
441 Build, compile and return a regular expression based on `definition`.
442
443 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
444 where "parts" is a list of regular expressions and/or regular
445 expression definitions to be joined into an or-group.
446 """
447 name, prefix, suffix, parts = definition
448 part_strings = []
449 for part in parts:
450 if isinstance(part, tuple):
451 part_strings.append(build_regexp(part, None))
452 else:
453 part_strings.append(part)
454 or_group = '|'.join(part_strings)
455 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
456 if compile:
457 return re.compile(regexp, re.UNICODE)
458 else:
459 return regexp
460
461
462 class Inliner(object):
463
464 """
465 Parse inline markup; call the `parse()` method.
466 """
467
468 def __init__(self):
469 self.implicit_dispatch = []
470 """List of (pattern, bound method) tuples, used by
471 `self.implicit_inline`."""
472
473 def init_customizations(self, settings):
474 # lookahead and look-behind expressions for inline markup rules
475 if getattr(settings, 'character_level_inline_markup', False):
476 start_string_prefix = u'(^|(?<!\x00))'
477 end_string_suffix = u''
478 else:
479 start_string_prefix = (u'(^|(?<=\\s|[%s%s]))' %
480 (punctuation_chars.openers,
481 punctuation_chars.delimiters))
482 end_string_suffix = (u'($|(?=\\s|[\x00%s%s%s]))' %
483 (punctuation_chars.closing_delimiters,
484 punctuation_chars.delimiters,
485 punctuation_chars.closers))
486 args = locals().copy()
487 args.update(vars(self.__class__))
488
489 parts = ('initial_inline', start_string_prefix, '',
490 [('start', '', self.non_whitespace_after, # simple start-strings
491 [r'\*\*', # strong
492 r'\*(?!\*)', # emphasis but not strong
493 r'``', # literal
494 r'_`', # inline internal target
495 r'\|(?!\|)'] # substitution reference
496 ),
497 ('whole', '', end_string_suffix, # whole constructs
498 [# reference name & end-string
499 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,
500 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
501 [r'[0-9]+', # manually numbered
502 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)
503 r'\*', # auto-symbol
504 r'(?P<citationlabel>%s)' % self.simplename] # citation reference
505 )
506 ]
507 ),
508 ('backquote', # interpreted text or phrase reference
509 '(?P<role>(:%s:)?)' % self.simplename, # optional role
510 self.non_whitespace_after,
511 ['`(?!`)'] # but not literal
512 )
513 ]
514 )
515 self.start_string_prefix = start_string_prefix
516 self.end_string_suffix = end_string_suffix
517 self.parts = parts
518
519 self.patterns = Struct(
520 initial=build_regexp(parts),
521 emphasis=re.compile(self.non_whitespace_escape_before
522 + r'(\*)' + end_string_suffix, re.UNICODE),
523 strong=re.compile(self.non_whitespace_escape_before
524 + r'(\*\*)' + end_string_suffix, re.UNICODE),
525 interpreted_or_phrase_ref=re.compile(
526 r"""
527 %(non_unescaped_whitespace_escape_before)s
528 (
529 `
530 (?P<suffix>
531 (?P<role>:%(simplename)s:)?
532 (?P<refend>__?)?
533 )
534 )
535 %(end_string_suffix)s
536 """ % args, re.VERBOSE | re.UNICODE),
537 embedded_link=re.compile(
538 r"""
539 (
540 (?:[ \n]+|^) # spaces or beginning of line/string
541 < # open bracket
542 %(non_whitespace_after)s
543 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
544 %(non_whitespace_escape_before)s
545 > # close bracket
546 )
547 $ # end of string
548 """ % args, re.VERBOSE | re.UNICODE),
549 literal=re.compile(self.non_whitespace_before + '(``)'
550 + end_string_suffix, re.UNICODE),
551 target=re.compile(self.non_whitespace_escape_before
552 + r'(`)' + end_string_suffix, re.UNICODE),
553 substitution_ref=re.compile(self.non_whitespace_escape_before
554 + r'(\|_{0,2})'
555 + end_string_suffix, re.UNICODE),
556 email=re.compile(self.email_pattern % args + '$',
557 re.VERBOSE | re.UNICODE),
558 uri=re.compile(
559 (r"""
560 %(start_string_prefix)s
561 (?P<whole>
562 (?P<absolute> # absolute URI
563 (?P<scheme> # scheme (http, ftp, mailto)
564 [a-zA-Z][a-zA-Z0-9.+-]*
565 )
566 :
567 (
568 ( # either:
569 (//?)? # hierarchical URI
570 %(uric)s* # URI characters
571 %(uri_end)s # final URI char
572 )
573 ( # optional query
574 \?%(uric)s*
575 %(uri_end)s
576 )?
577 ( # optional fragment
578 \#%(uric)s*
579 %(uri_end)s
580 )?
581 )
582 )
583 | # *OR*
584 (?P<email> # email address
585 """ + self.email_pattern + r"""
586 )
587 )
588 %(end_string_suffix)s
589 """) % args, re.VERBOSE | re.UNICODE),
590 pep=re.compile(
591 r"""
592 %(start_string_prefix)s
593 (
594 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
595 |
596 (PEP\s+(?P<pepnum2>\d+)) # reference by name
597 )
598 %(end_string_suffix)s""" % args, re.VERBOSE | re.UNICODE),
599 rfc=re.compile(
600 r"""
601 %(start_string_prefix)s
602 (RFC(-|\s+)?(?P<rfcnum>\d+))
603 %(end_string_suffix)s""" % args, re.VERBOSE | re.UNICODE))
604
605 self.implicit_dispatch.append((self.patterns.uri,
606 self.standalone_uri))
607 if settings.pep_references:
608 self.implicit_dispatch.append((self.patterns.pep,
609 self.pep_reference))
610 if settings.rfc_references:
611 self.implicit_dispatch.append((self.patterns.rfc,
612 self.rfc_reference))
613
614 def parse(self, text, lineno, memo, parent):
615 # Needs to be refactored for nested inline markup.
616 # Add nested_parse() method?
617 """
618 Return 2 lists: nodes (text and inline elements), and system_messages.
619
620 Using `self.patterns.initial`, a pattern which matches start-strings
621 (emphasis, strong, interpreted, phrase reference, literal,
622 substitution reference, and inline target) and complete constructs
623 (simple reference, footnote reference), search for a candidate. When
624 one is found, check for validity (e.g., not a quoted '*' character).
625 If valid, search for the corresponding end string if applicable, and
626 check it for validity. If not found or invalid, generate a warning
627 and ignore the start-string. Implicit inline markup (e.g. standalone
628 URIs) is found last.
629 """
630 self.reporter = memo.reporter
631 self.document = memo.document
632 self.language = memo.language
633 self.parent = parent
634 pattern_search = self.patterns.initial.search
635 dispatch = self.dispatch
636 remaining = escape2null(text)
637 processed = []
638 unprocessed = []
639 messages = []
640 while remaining:
641 match = pattern_search(remaining)
642 if match:
643 groups = match.groupdict()
644 method = dispatch[groups['start'] or groups['backquote']
645 or groups['refend'] or groups['fnend']]
646 before, inlines, remaining, sysmessages = method(self, match,
647 lineno)
648 unprocessed.append(before)
649 messages += sysmessages
650 if inlines:
651 processed += self.implicit_inline(''.join(unprocessed),
652 lineno)
653 processed += inlines
654 unprocessed = []
655 else:
656 break
657 remaining = ''.join(unprocessed) + remaining
658 if remaining:
659 processed += self.implicit_inline(remaining, lineno)
660 return processed, messages
661
662 # Inline object recognition
663 # -------------------------
664 # See also init_customizations().
665 non_whitespace_before = r'(?<!\s)'
666 non_whitespace_escape_before = r'(?<![\s\x00])'
667 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'
668 non_whitespace_after = r'(?!\s)'
669 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
670 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
671 # Valid URI characters (see RFC 2396 & RFC 2732);
672 # final \x00 allows backslash escapes in URIs:
673 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
674 # Delimiter indicating the end of a URI (not part of the URI):
675 uri_end_delim = r"""[>]"""
676 # Last URI character; same as uric but no punctuation:
677 urilast = r"""[_~*/=+a-zA-Z0-9]"""
678 # End of a URI (either 'urilast' or 'uric followed by a
679 # uri_end_delim'):
680 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
681 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
682 email_pattern = r"""
683 %(emailc)s+(?:\.%(emailc)s+)* # name
684 (?<!\x00)@ # at
685 %(emailc)s+(?:\.%(emailc)s*)* # host
686 %(uri_end)s # final URI char
687 """
688
689 def quoted_start(self, match):
690 """Test if inline markup start-string is 'quoted'.
691
692 'Quoted' in this context means the start-string is enclosed in a pair
693 of matching opening/closing delimiters (not necessarily quotes)
694 or at the end of the match.
695 """
696 string = match.string
697 start = match.start()
698 if start == 0: # start-string at beginning of text
699 return False
700 prestart = string[start - 1]
701 try:
702 poststart = string[match.end()]
703 except IndexError: # start-string at end of text
704 return True # not "quoted" but no markup start-string either
705 return punctuation_chars.match_chars(prestart, poststart)
706
707 def inline_obj(self, match, lineno, end_pattern, nodeclass,
708 restore_backslashes=False):
709 string = match.string
710 matchstart = match.start('start')
711 matchend = match.end('start')
712 if self.quoted_start(match):
713 return (string[:matchend], [], string[matchend:], [], '')
714 endmatch = end_pattern.search(string[matchend:])
715 if endmatch and endmatch.start(1): # 1 or more chars
716 text = endmatch.string[:endmatch.start(1)]
717 if restore_backslashes:
718 text = unescape(text, True)
719 textend = matchend + endmatch.end(1)
720 rawsource = unescape(string[matchstart:textend], True)
721 node = nodeclass(rawsource, text)
722 return (string[:matchstart], [node],
723 string[textend:], [], endmatch.group(1))
724 msg = self.reporter.warning(
725 'Inline %s start-string without end-string.'
726 % nodeclass.__name__, line=lineno)
727 text = unescape(string[matchstart:matchend], True)
728 prb = self.problematic(text, text, msg)
729 return string[:matchstart], [prb], string[matchend:], [msg], ''
730
731 def problematic(self, text, rawsource, message):
732 msgid = self.document.set_id(message, self.parent)
733 problematic = nodes.problematic(rawsource, text, refid=msgid)
734 prbid = self.document.set_id(problematic)
735 message.add_backref(prbid)
736 return problematic
737
738 def emphasis(self, match, lineno):
739 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
740 match, lineno, self.patterns.emphasis, nodes.emphasis)
741 return before, inlines, remaining, sysmessages
742
743 def strong(self, match, lineno):
744 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
745 match, lineno, self.patterns.strong, nodes.strong)
746 return before, inlines, remaining, sysmessages
747
748 def interpreted_or_phrase_ref(self, match, lineno):
749 end_pattern = self.patterns.interpreted_or_phrase_ref
750 string = match.string
751 matchstart = match.start('backquote')
752 matchend = match.end('backquote')
753 rolestart = match.start('role')
754 role = match.group('role')
755 position = ''
756 if role:
757 role = role[1:-1]
758 position = 'prefix'
759 elif self.quoted_start(match):
760 return (string[:matchend], [], string[matchend:], [])
761 endmatch = end_pattern.search(string[matchend:])
762 if endmatch and endmatch.start(1): # 1 or more chars
763 textend = matchend + endmatch.end()
764 if endmatch.group('role'):
765 if role:
766 msg = self.reporter.warning(
767 'Multiple roles in interpreted text (both '
768 'prefix and suffix present; only one allowed).',
769 line=lineno)
770 text = unescape(string[rolestart:textend], True)
771 prb = self.problematic(text, text, msg)
772 return string[:rolestart], [prb], string[textend:], [msg]
773 role = endmatch.group('suffix')[1:-1]
774 position = 'suffix'
775 escaped = endmatch.string[:endmatch.start(1)]
776 rawsource = unescape(string[matchstart:textend], True)
777 if rawsource[-1:] == '_':
778 if role:
779 msg = self.reporter.warning(
780 'Mismatch: both interpreted text role %s and '
781 'reference suffix.' % position, line=lineno)
782 text = unescape(string[rolestart:textend], True)
783 prb = self.problematic(text, text, msg)
784 return string[:rolestart], [prb], string[textend:], [msg]
785 return self.phrase_ref(string[:matchstart], string[textend:],
786 rawsource, escaped)
787 else:
788 rawsource = unescape(string[rolestart:textend], True)
789 nodelist, messages = self.interpreted(rawsource, escaped, role,
790 lineno)
791 return (string[:rolestart], nodelist,
792 string[textend:], messages)
793 msg = self.reporter.warning(
794 'Inline interpreted text or phrase reference start-string '
795 'without end-string.', line=lineno)
796 text = unescape(string[matchstart:matchend], True)
797 prb = self.problematic(text, text, msg)
798 return string[:matchstart], [prb], string[matchend:], [msg]
799
800 def phrase_ref(self, before, after, rawsource, escaped, text=None):
801 # `text` is ignored (since 0.16)
802 match = self.patterns.embedded_link.search(escaped)
803 if match: # embedded <URI> or <alias_>
804 text = escaped[:match.start(0)]
805 unescaped = unescape(text)
806 rawtext = unescape(text, True)
807 aliastext = match.group(2)
808 rawaliastext = unescape(aliastext, True)
809 underscore_escaped = rawaliastext.endswith(r'\_')
810 if aliastext.endswith('_') and not (underscore_escaped
811 or self.patterns.uri.match(aliastext)):
812 aliastype = 'name'
813 alias = normalize_name(unescape(aliastext[:-1]))
814 target = nodes.target(match.group(1), refname=alias)
815 target.indirect_reference_name = whitespace_normalize_name(
816 unescape(aliastext[:-1]))
817 else:
818 aliastype = 'uri'
819 # remove unescaped whitespace
820 alias_parts = split_escaped_whitespace(match.group(2))
821 alias = ' '.join(''.join(part.split())
822 for part in alias_parts)
823 alias = self.adjust_uri(unescape(alias))
824 if alias.endswith(r'\_'):
825 alias = alias[:-2] + '_'
826 target = nodes.target(match.group(1), refuri=alias)
827 target.referenced = 1
828 if not aliastext:
829 raise ApplicationError('problem with embedded link: %r'
830 % aliastext)
831 if not text:
832 text = alias
833 unescaped = unescape(text)
834 rawtext = rawaliastext
835 else:
836 text = escaped
837 unescaped = unescape(text)
838 target = None
839 rawtext = unescape(escaped, True)
840
841 refname = normalize_name(unescaped)
842 reference = nodes.reference(rawsource, text,
843 name=whitespace_normalize_name(unescaped))
844 reference[0].rawsource = rawtext
845
846 node_list = [reference]
847
848 if rawsource[-2:] == '__':
849 if target and (aliastype == 'name'):
850 reference['refname'] = alias
851 self.document.note_refname(reference)
852 # self.document.note_indirect_target(target) # required?
853 elif target and (aliastype == 'uri'):
854 reference['refuri'] = alias
855 else:
856 reference['anonymous'] = 1
857 else:
858 if target:
859 target['names'].append(refname)
860 if aliastype == 'name':
861 reference['refname'] = alias
862 self.document.note_indirect_target(target)
863 self.document.note_refname(reference)
864 else:
865 reference['refuri'] = alias
866 self.document.note_explicit_target(target, self.parent)
867 # target.note_referenced_by(name=refname)
868 node_list.append(target)
869 else:
870 reference['refname'] = refname
871 self.document.note_refname(reference)
872 return before, node_list, after, []
873
874
875 def adjust_uri(self, uri):
876 match = self.patterns.email.match(uri)
877 if match:
878 return 'mailto:' + uri
879 else:
880 return uri
881
882 def interpreted(self, rawsource, text, role, lineno):
883 role_fn, messages = roles.role(role, self.language, lineno,
884 self.reporter)
885 if role_fn:
886 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
887 return nodes, messages + messages2
888 else:
889 msg = self.reporter.error(
890 'Unknown interpreted text role "%s".' % role,
891 line=lineno)
892 return ([self.problematic(rawsource, rawsource, msg)],
893 messages + [msg])
894
895 def literal(self, match, lineno):
896 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
897 match, lineno, self.patterns.literal, nodes.literal,
898 restore_backslashes=True)
899 return before, inlines, remaining, sysmessages
900
901 def inline_internal_target(self, match, lineno):
902 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
903 match, lineno, self.patterns.target, nodes.target)
904 if inlines and isinstance(inlines[0], nodes.target):
905 assert len(inlines) == 1
906 target = inlines[0]
907 name = normalize_name(target.astext())
908 target['names'].append(name)
909 self.document.note_explicit_target(target, self.parent)
910 return before, inlines, remaining, sysmessages
911
912 def substitution_reference(self, match, lineno):
913 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
914 match, lineno, self.patterns.substitution_ref,
915 nodes.substitution_reference)
916 if len(inlines) == 1:
917 subref_node = inlines[0]
918 if isinstance(subref_node, nodes.substitution_reference):
919 subref_text = subref_node.astext()
920 self.document.note_substitution_ref(subref_node, subref_text)
921 if endstring[-1:] == '_':
922 reference_node = nodes.reference(
923 '|%s%s' % (subref_text, endstring), '')
924 if endstring[-2:] == '__':
925 reference_node['anonymous'] = 1
926 else:
927 reference_node['refname'] = normalize_name(subref_text)
928 self.document.note_refname(reference_node)
929 reference_node += subref_node
930 inlines = [reference_node]
931 return before, inlines, remaining, sysmessages
932
933 def footnote_reference(self, match, lineno):
934 """
935 Handles `nodes.footnote_reference` and `nodes.citation_reference`
936 elements.
937 """
938 label = match.group('footnotelabel')
939 refname = normalize_name(label)
940 string = match.string
941 before = string[:match.start('whole')]
942 remaining = string[match.end('whole'):]
943 if match.group('citationlabel'):
944 refnode = nodes.citation_reference('[%s]_' % label,
945 refname=refname)
946 refnode += nodes.Text(label)
947 self.document.note_citation_ref(refnode)
948 else:
949 refnode = nodes.footnote_reference('[%s]_' % label)
950 if refname[0] == '#':
951 refname = refname[1:]
952 refnode['auto'] = 1
953 self.document.note_autofootnote_ref(refnode)
954 elif refname == '*':
955 refname = ''
956 refnode['auto'] = '*'
957 self.document.note_symbol_footnote_ref(
958 refnode)
959 else:
960 refnode += nodes.Text(label)
961 if refname:
962 refnode['refname'] = refname
963 self.document.note_footnote_ref(refnode)
964 if utils.get_trim_footnote_ref_space(self.document.settings):
965 before = before.rstrip()
966 return (before, [refnode], remaining, [])
967
968 def reference(self, match, lineno, anonymous=False):
969 referencename = match.group('refname')
970 refname = normalize_name(referencename)
971 referencenode = nodes.reference(
972 referencename + match.group('refend'), referencename,
973 name=whitespace_normalize_name(referencename))
974 referencenode[0].rawsource = referencename
975 if anonymous:
976 referencenode['anonymous'] = 1
977 else:
978 referencenode['refname'] = refname
979 self.document.note_refname(referencenode)
980 string = match.string
981 matchstart = match.start('whole')
982 matchend = match.end('whole')
983 return (string[:matchstart], [referencenode], string[matchend:], [])
984
985 def anonymous_reference(self, match, lineno):
986 return self.reference(match, lineno, anonymous=1)
987
988 def standalone_uri(self, match, lineno):
989 if (not match.group('scheme')
990 or match.group('scheme').lower() in urischemes.schemes):
991 if match.group('email'):
992 addscheme = 'mailto:'
993 else:
994 addscheme = ''
995 text = match.group('whole')
996 refuri = addscheme + unescape(text)
997 reference = nodes.reference(unescape(text, True), text,
998 refuri=refuri)
999 return [reference]
1000 else: # not a valid scheme
1001 raise MarkupMismatch
1002
1003 def pep_reference(self, match, lineno):
1004 text = match.group(0)
1005 if text.startswith('pep-'):
1006 pepnum = int(unescape(match.group('pepnum1')))
1007 elif text.startswith('PEP'):
1008 pepnum = int(unescape(match.group('pepnum2')))
1009 else:
1010 raise MarkupMismatch
1011 ref = (self.document.settings.pep_base_url
1012 + self.document.settings.pep_file_url_template % pepnum)
1013 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1014
1015 rfc_url = 'rfc%d.html'
1016
1017 def rfc_reference(self, match, lineno):
1018 text = match.group(0)
1019 if text.startswith('RFC'):
1020 rfcnum = int(unescape(match.group('rfcnum')))
1021 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
1022 else:
1023 raise MarkupMismatch
1024 return [nodes.reference(unescape(text, True), text, refuri=ref)]
1025
1026 def implicit_inline(self, text, lineno):
1027 """
1028 Check each of the patterns in `self.implicit_dispatch` for a match,
1029 and dispatch to the stored method for the pattern. Recursively check
1030 the text before and after the match. Return a list of `nodes.Text`
1031 and inline element nodes.
1032 """
1033 if not text:
1034 return []
1035 for pattern, method in self.implicit_dispatch:
1036 match = pattern.search(text)
1037 if match:
1038 try:
1039 # Must recurse on strings before *and* after the match;
1040 # there may be multiple patterns.
1041 return (self.implicit_inline(text[:match.start()], lineno)
1042 + method(match, lineno) +
1043 self.implicit_inline(text[match.end():], lineno))
1044 except MarkupMismatch:
1045 pass
1046 return [nodes.Text(text, unescape(text, True))]
1047
1048 dispatch = {'*': emphasis,
1049 '**': strong,
1050 '`': interpreted_or_phrase_ref,
1051 '``': literal,
1052 '_`': inline_internal_target,
1053 ']_': footnote_reference,
1054 '|': substitution_reference,
1055 '_': reference,
1056 '__': anonymous_reference}
1057
1058
1059 def _loweralpha_to_int(s, _zero=(ord('a')-1)):
1060 return ord(s) - _zero
1061
1062 def _upperalpha_to_int(s, _zero=(ord('A')-1)):
1063 return ord(s) - _zero
1064
1065 def _lowerroman_to_int(s):
1066 return roman.fromRoman(s.upper())
1067
1068
1069 class Body(RSTState):
1070
1071 """
1072 Generic classifier of the first line of a block.
1073 """
1074
1075 double_width_pad_char = tableparser.TableParser.double_width_pad_char
1076 """Padding character for East Asian double-width text."""
1077
1078 enum = Struct()
1079 """Enumerated list parsing information."""
1080
1081 enum.formatinfo = {
1082 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1083 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1084 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1085 enum.formats = enum.formatinfo.keys()
1086 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1087 'lowerroman', 'upperroman'] # ORDERED!
1088 enum.sequencepats = {'arabic': '[0-9]+',
1089 'loweralpha': '[a-z]',
1090 'upperalpha': '[A-Z]',
1091 'lowerroman': '[ivxlcdm]+',
1092 'upperroman': '[IVXLCDM]+',}
1093 enum.converters = {'arabic': int,
1094 'loweralpha': _loweralpha_to_int,
1095 'upperalpha': _upperalpha_to_int,
1096 'lowerroman': _lowerroman_to_int,
1097 'upperroman': roman.fromRoman}
1098
1099 enum.sequenceregexps = {}
1100 for sequence in enum.sequences:
1101 enum.sequenceregexps[sequence] = re.compile(
1102 enum.sequencepats[sequence] + '$', re.UNICODE)
1103
1104 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1105 """Matches the top (& bottom) of a full table)."""
1106
1107 simple_table_top_pat = re.compile('=+( +=+)+ *$')
1108 """Matches the top of a simple table."""
1109
1110 simple_table_border_pat = re.compile('=+[ =]*$')
1111 """Matches the bottom & header bottom of a simple table."""
1112
1113 pats = {}
1114 """Fragments of patterns used by transitions."""
1115
1116 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1117 pats['alpha'] = '[a-zA-Z]'
1118 pats['alphanum'] = '[a-zA-Z0-9]'
1119 pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1120 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1121 '|%(upperroman)s|#)' % enum.sequencepats)
1122 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1123 # @@@ Loosen up the pattern? Allow Unicode?
1124 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1125 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1126 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1127 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1128
1129 for format in enum.formats:
1130 pats[format] = '(?P<%s>%s%s%s)' % (
1131 format, re.escape(enum.formatinfo[format].prefix),
1132 pats['enum'], re.escape(enum.formatinfo[format].suffix))
1133
1134 patterns = {
1135 'bullet': u'[-+*\u2022\u2023\u2043]( +|$)',
1136 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1137 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
1138 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1139 'doctest': r'>>>( +|$)',
1140 'line_block': r'\|( +|$)',
1141 'grid_table_top': grid_table_top_pat,
1142 'simple_table_top': simple_table_top_pat,
1143 'explicit_markup': r'\.\.( +|$)',
1144 'anonymous': r'__( +|$)',
1145 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1146 'text': r''}
1147 initial_transitions = (
1148 'bullet',
1149 'enumerator',
1150 'field_marker',
1151 'option_marker',
1152 'doctest',
1153 'line_block',
1154 'grid_table_top',
1155 'simple_table_top',
1156 'explicit_markup',
1157 'anonymous',
1158 'line',
1159 'text')
1160
1161 def indent(self, match, context, next_state):
1162 """Block quote."""
1163 indented, indent, line_offset, blank_finish = \
1164 self.state_machine.get_indented()
1165 elements = self.block_quote(indented, line_offset)
1166 self.parent += elements
1167 if not blank_finish:
1168 self.parent += self.unindent_warning('Block quote')
1169 return context, next_state, []
1170
1171 def block_quote(self, indented, line_offset):
1172 elements = []
1173 while indented:
1174 (blockquote_lines,
1175 attribution_lines,
1176 attribution_offset,
1177 indented,
1178 new_line_offset) = self.split_attribution(indented, line_offset)
1179 blockquote = nodes.block_quote()
1180 self.nested_parse(blockquote_lines, line_offset, blockquote)
1181 elements.append(blockquote)
1182 if attribution_lines:
1183 attribution, messages = self.parse_attribution(
1184 attribution_lines, attribution_offset)
1185 blockquote += attribution
1186 elements += messages
1187 line_offset = new_line_offset
1188 while indented and not indented[0]:
1189 indented = indented[1:]
1190 line_offset += 1
1191 return elements
1192
1193 # U+2014 is an em-dash:
1194 attribution_pattern = re.compile(u'(---?(?!-)|\u2014) *(?=[^ \\n])',
1195 re.UNICODE)
1196
1197 def split_attribution(self, indented, line_offset):
1198 """
1199 Check for a block quote attribution and split it off:
1200
1201 * First line after a blank line must begin with a dash ("--", "---",
1202 em-dash; matches `self.attribution_pattern`).
1203 * Every line after that must have consistent indentation.
1204 * Attributions must be preceded by block quote content.
1205
1206 Return a tuple of: (block quote content lines, content offset,
1207 attribution lines, attribution offset, remaining indented lines).
1208 """
1209 blank = None
1210 nonblank_seen = False
1211 for i in range(len(indented)):
1212 line = indented[i].rstrip()
1213 if line:
1214 if nonblank_seen and blank == i - 1: # last line blank
1215 match = self.attribution_pattern.match(line)
1216 if match:
1217 attribution_end, indent = self.check_attribution(
1218 indented, i)
1219 if attribution_end:
1220 a_lines = indented[i:attribution_end]
1221 a_lines.trim_left(match.end(), end=1)
1222 a_lines.trim_left(indent, start=1)
1223 return (indented[:i], a_lines,
1224 i, indented[attribution_end:],
1225 line_offset + attribution_end)
1226 nonblank_seen = True
1227 else:
1228 blank = i
1229 else:
1230 return (indented, None, None, None, None)
1231
1232 def check_attribution(self, indented, attribution_start):
1233 """
1234 Check attribution shape.
1235 Return the index past the end of the attribution, and the indent.
1236 """
1237 indent = None
1238 i = attribution_start + 1
1239 for i in range(attribution_start + 1, len(indented)):
1240 line = indented[i].rstrip()
1241 if not line:
1242 break
1243 if indent is None:
1244 indent = len(line) - len(line.lstrip())
1245 elif len(line) - len(line.lstrip()) != indent:
1246 return None, None # bad shape; not an attribution
1247 else:
1248 # return index of line after last attribution line:
1249 i += 1
1250 return i, (indent or 0)
1251
1252 def parse_attribution(self, indented, line_offset):
1253 text = '\n'.join(indented).rstrip()
1254 lineno = self.state_machine.abs_line_number() + line_offset
1255 textnodes, messages = self.inline_text(text, lineno)
1256 node = nodes.attribution(text, '', *textnodes)
1257 node.source, node.line = self.state_machine.get_source_and_line(lineno)
1258 return node, messages
1259
1260 def bullet(self, match, context, next_state):
1261 """Bullet list item."""
1262 bulletlist = nodes.bullet_list()
1263 (bulletlist.source,
1264 bulletlist.line) = self.state_machine.get_source_and_line()
1265 self.parent += bulletlist
1266 bulletlist['bullet'] = match.string[0]
1267 i, blank_finish = self.list_item(match.end())
1268 bulletlist += i
1269 offset = self.state_machine.line_offset + 1 # next line
1270 new_line_offset, blank_finish = self.nested_list_parse(
1271 self.state_machine.input_lines[offset:],
1272 input_offset=self.state_machine.abs_line_offset() + 1,
1273 node=bulletlist, initial_state='BulletList',
1274 blank_finish=blank_finish)
1275 self.goto_line(new_line_offset)
1276 if not blank_finish:
1277 self.parent += self.unindent_warning('Bullet list')
1278 return [], next_state, []
1279
1280 def list_item(self, indent):
1281 if self.state_machine.line[indent:]:
1282 indented, line_offset, blank_finish = (
1283 self.state_machine.get_known_indented(indent))
1284 else:
1285 indented, indent, line_offset, blank_finish = (
1286 self.state_machine.get_first_known_indented(indent))
1287 listitem = nodes.list_item('\n'.join(indented))
1288 if indented:
1289 self.nested_parse(indented, input_offset=line_offset,
1290 node=listitem)
1291 return listitem, blank_finish
1292
1293 def enumerator(self, match, context, next_state):
1294 """Enumerated List Item"""
1295 format, sequence, text, ordinal = self.parse_enumerator(match)
1296 if not self.is_enumerated_list_item(ordinal, sequence, format):
1297 raise statemachine.TransitionCorrection('text')
1298 enumlist = nodes.enumerated_list()
1299 self.parent += enumlist
1300 if sequence == '#':
1301 enumlist['enumtype'] = 'arabic'
1302 else:
1303 enumlist['enumtype'] = sequence
1304 enumlist['prefix'] = self.enum.formatinfo[format].prefix
1305 enumlist['suffix'] = self.enum.formatinfo[format].suffix
1306 if ordinal != 1:
1307 enumlist['start'] = ordinal
1308 msg = self.reporter.info(
1309 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1310 % (text, ordinal))
1311 self.parent += msg
1312 listitem, blank_finish = self.list_item(match.end())
1313 enumlist += listitem
1314 offset = self.state_machine.line_offset + 1 # next line
1315 newline_offset, blank_finish = self.nested_list_parse(
1316 self.state_machine.input_lines[offset:],
1317 input_offset=self.state_machine.abs_line_offset() + 1,
1318 node=enumlist, initial_state='EnumeratedList',
1319 blank_finish=blank_finish,
1320 extra_settings={'lastordinal': ordinal,
1321 'format': format,
1322 'auto': sequence == '#'})
1323 self.goto_line(newline_offset)
1324 if not blank_finish:
1325 self.parent += self.unindent_warning('Enumerated list')
1326 return [], next_state, []
1327
1328 def parse_enumerator(self, match, expected_sequence=None):
1329 """
1330 Analyze an enumerator and return the results.
1331
1332 :Return:
1333 - the enumerator format ('period', 'parens', or 'rparen'),
1334 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1335 - the text of the enumerator, stripped of formatting, and
1336 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1337 ``None`` is returned for invalid enumerator text).
1338
1339 The enumerator format has already been determined by the regular
1340 expression match. If `expected_sequence` is given, that sequence is
1341 tried first. If not, we check for Roman numeral 1. This way,
1342 single-character Roman numerals (which are also alphabetical) can be
1343 matched. If no sequence has been matched, all sequences are checked in
1344 order.
1345 """
1346 groupdict = match.groupdict()
1347 sequence = ''
1348 for format in self.enum.formats:
1349 if groupdict[format]: # was this the format matched?
1350 break # yes; keep `format`
1351 else: # shouldn't happen
1352 raise ParserError('enumerator format not matched')
1353 text = groupdict[format][self.enum.formatinfo[format].start
1354 :self.enum.formatinfo[format].end]
1355 if text == '#':
1356 sequence = '#'
1357 elif expected_sequence:
1358 try:
1359 if self.enum.sequenceregexps[expected_sequence].match(text):
1360 sequence = expected_sequence
1361 except KeyError: # shouldn't happen
1362 raise ParserError('unknown enumerator sequence: %s'
1363 % sequence)
1364 elif text == 'i':
1365 sequence = 'lowerroman'
1366 elif text == 'I':
1367 sequence = 'upperroman'
1368 if not sequence:
1369 for sequence in self.enum.sequences:
1370 if self.enum.sequenceregexps[sequence].match(text):
1371 break
1372 else: # shouldn't happen
1373 raise ParserError('enumerator sequence not matched')
1374 if sequence == '#':
1375 ordinal = 1
1376 else:
1377 try:
1378 ordinal = self.enum.converters[sequence](text)
1379 except roman.InvalidRomanNumeralError:
1380 ordinal = None
1381 return format, sequence, text, ordinal
1382
1383 def is_enumerated_list_item(self, ordinal, sequence, format):
1384 """
1385 Check validity based on the ordinal value and the second line.
1386
1387 Return true if the ordinal is valid and the second line is blank,
1388 indented, or starts with the next enumerator or an auto-enumerator.
1389 """
1390 if ordinal is None:
1391 return None
1392 try:
1393 next_line = self.state_machine.next_line()
1394 except EOFError: # end of input lines
1395 self.state_machine.previous_line()
1396 return 1
1397 else:
1398 self.state_machine.previous_line()
1399 if not next_line[:1].strip(): # blank or indented
1400 return 1
1401 result = self.make_enumerator(ordinal + 1, sequence, format)
1402 if result:
1403 next_enumerator, auto_enumerator = result
1404 try:
1405 if ( next_line.startswith(next_enumerator) or
1406 next_line.startswith(auto_enumerator) ):
1407 return 1
1408 except TypeError:
1409 pass
1410 return None
1411
1412 def make_enumerator(self, ordinal, sequence, format):
1413 """
1414 Construct and return the next enumerated list item marker, and an
1415 auto-enumerator ("#" instead of the regular enumerator).
1416
1417 Return ``None`` for invalid (out of range) ordinals.
1418 """ #"
1419 if sequence == '#':
1420 enumerator = '#'
1421 elif sequence == 'arabic':
1422 enumerator = str(ordinal)
1423 else:
1424 if sequence.endswith('alpha'):
1425 if ordinal > 26:
1426 return None
1427 enumerator = chr(ordinal + ord('a') - 1)
1428 elif sequence.endswith('roman'):
1429 try:
1430 enumerator = roman.toRoman(ordinal)
1431 except roman.RomanError:
1432 return None
1433 else: # shouldn't happen
1434 raise ParserError('unknown enumerator sequence: "%s"'
1435 % sequence)
1436 if sequence.startswith('lower'):
1437 enumerator = enumerator.lower()
1438 elif sequence.startswith('upper'):
1439 enumerator = enumerator.upper()
1440 else: # shouldn't happen
1441 raise ParserError('unknown enumerator sequence: "%s"'
1442 % sequence)
1443 formatinfo = self.enum.formatinfo[format]
1444 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1445 + ' ')
1446 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1447 return next_enumerator, auto_enumerator
1448
1449 def field_marker(self, match, context, next_state):
1450 """Field list item."""
1451 field_list = nodes.field_list()
1452 self.parent += field_list
1453 field, blank_finish = self.field(match)
1454 field_list += field
1455 offset = self.state_machine.line_offset + 1 # next line
1456 newline_offset, blank_finish = self.nested_list_parse(
1457 self.state_machine.input_lines[offset:],
1458 input_offset=self.state_machine.abs_line_offset() + 1,
1459 node=field_list, initial_state='FieldList',
1460 blank_finish=blank_finish)
1461 self.goto_line(newline_offset)
1462 if not blank_finish:
1463 self.parent += self.unindent_warning('Field list')
1464 return [], next_state, []
1465
1466 def field(self, match):
1467 name = self.parse_field_marker(match)
1468 src, srcline = self.state_machine.get_source_and_line()
1469 lineno = self.state_machine.abs_line_number()
1470 indented, indent, line_offset, blank_finish = \
1471 self.state_machine.get_first_known_indented(match.end())
1472 field_node = nodes.field()
1473 field_node.source = src
1474 field_node.line = srcline
1475 name_nodes, name_messages = self.inline_text(name, lineno)
1476 field_node += nodes.field_name(name, '', *name_nodes)
1477 field_body = nodes.field_body('\n'.join(indented), *name_messages)
1478 field_node += field_body
1479 if indented:
1480 self.parse_field_body(indented, line_offset, field_body)
1481 return field_node, blank_finish
1482
1483 def parse_field_marker(self, match):
1484 """Extract & return field name from a field marker match."""
1485 field = match.group()[1:] # strip off leading ':'
1486 field = field[:field.rfind(':')] # strip off trailing ':' etc.
1487 return field
1488
1489 def parse_field_body(self, indented, offset, node):
1490 self.nested_parse(indented, input_offset=offset, node=node)
1491
1492 def option_marker(self, match, context, next_state):
1493 """Option list item."""
1494 optionlist = nodes.option_list()
1495 (optionlist.source, optionlist.line) = self.state_machine.get_source_and_line()
1496 try:
1497 listitem, blank_finish = self.option_list_item(match)
1498 except MarkupError as error:
1499 # This shouldn't happen; pattern won't match.
1500 msg = self.reporter.error(u'Invalid option list marker: %s' %
1501 error)
1502 self.parent += msg
1503 indented, indent, line_offset, blank_finish = \
1504 self.state_machine.get_first_known_indented(match.end())
1505 elements = self.block_quote(indented, line_offset)
1506 self.parent += elements
1507 if not blank_finish:
1508 self.parent += self.unindent_warning('Option list')
1509 return [], next_state, []
1510 self.parent += optionlist
1511 optionlist += listitem
1512 offset = self.state_machine.line_offset + 1 # next line
1513 newline_offset, blank_finish = self.nested_list_parse(
1514 self.state_machine.input_lines[offset:],
1515 input_offset=self.state_machine.abs_line_offset() + 1,
1516 node=optionlist, initial_state='OptionList',
1517 blank_finish=blank_finish)
1518 self.goto_line(newline_offset)
1519 if not blank_finish:
1520 self.parent += self.unindent_warning('Option list')
1521 return [], next_state, []
1522
1523 def option_list_item(self, match):
1524 offset = self.state_machine.abs_line_offset()
1525 options = self.parse_option_marker(match)
1526 indented, indent, line_offset, blank_finish = \
1527 self.state_machine.get_first_known_indented(match.end())
1528 if not indented: # not an option list item
1529 self.goto_line(offset)
1530 raise statemachine.TransitionCorrection('text')
1531 option_group = nodes.option_group('', *options)
1532 description = nodes.description('\n'.join(indented))
1533 option_list_item = nodes.option_list_item('', option_group,
1534 description)
1535 if indented:
1536 self.nested_parse(indented, input_offset=line_offset,
1537 node=description)
1538 return option_list_item, blank_finish
1539
1540 def parse_option_marker(self, match):
1541 """
1542 Return a list of `node.option` and `node.option_argument` objects,
1543 parsed from an option marker match.
1544
1545 :Exception: `MarkupError` for invalid option markers.
1546 """
1547 optlist = []
1548 optionstrings = match.group().rstrip().split(', ')
1549 for optionstring in optionstrings:
1550 tokens = optionstring.split()
1551 delimiter = ' '
1552 firstopt = tokens[0].split('=', 1)
1553 if len(firstopt) > 1:
1554 # "--opt=value" form
1555 tokens[:1] = firstopt
1556 delimiter = '='
1557 elif (len(tokens[0]) > 2
1558 and ((tokens[0].startswith('-')
1559 and not tokens[0].startswith('--'))
1560 or tokens[0].startswith('+'))):
1561 # "-ovalue" form
1562 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1563 delimiter = ''
1564 if len(tokens) > 1 and (tokens[1].startswith('<')
1565 and tokens[-1].endswith('>')):
1566 # "-o <value1 value2>" form; join all values into one token
1567 tokens[1:] = [' '.join(tokens[1:])]
1568 if 0 < len(tokens) <= 2:
1569 option = nodes.option(optionstring)
1570 option += nodes.option_string(tokens[0], tokens[0])
1571 if len(tokens) > 1:
1572 option += nodes.option_argument(tokens[1], tokens[1],
1573 delimiter=delimiter)
1574 optlist.append(option)
1575 else:
1576 raise MarkupError(
1577 'wrong number of option tokens (=%s), should be 1 or 2: '
1578 '"%s"' % (len(tokens), optionstring))
1579 return optlist
1580
1581 def doctest(self, match, context, next_state):
1582 data = '\n'.join(self.state_machine.get_text_block())
1583 # TODO: prepend class value ['pycon'] (Python Console)
1584 # parse with `directives.body.CodeBlock` (returns literal-block
1585 # with class "code" and syntax highlight markup).
1586 self.parent += nodes.doctest_block(data, data)
1587 return [], next_state, []
1588
1589 def line_block(self, match, context, next_state):
1590 """First line of a line block."""
1591 block = nodes.line_block()
1592 self.parent += block
1593 lineno = self.state_machine.abs_line_number()
1594 line, messages, blank_finish = self.line_block_line(match, lineno)
1595 block += line
1596 self.parent += messages
1597 if not blank_finish:
1598 offset = self.state_machine.line_offset + 1 # next line
1599 new_line_offset, blank_finish = self.nested_list_parse(
1600 self.state_machine.input_lines[offset:],
1601 input_offset=self.state_machine.abs_line_offset() + 1,
1602 node=block, initial_state='LineBlock',
1603 blank_finish=0)
1604 self.goto_line(new_line_offset)
1605 if not blank_finish:
1606 self.parent += self.reporter.warning(
1607 'Line block ends without a blank line.',
1608 line=lineno+1)
1609 if len(block):
1610 if block[0].indent is None:
1611 block[0].indent = 0
1612 self.nest_line_block_lines(block)
1613 return [], next_state, []
1614
1615 def line_block_line(self, match, lineno):
1616 """Return one line element of a line_block."""
1617 indented, indent, line_offset, blank_finish = \
1618 self.state_machine.get_first_known_indented(match.end(),
1619 until_blank=True)
1620 text = u'\n'.join(indented)
1621 text_nodes, messages = self.inline_text(text, lineno)
1622 line = nodes.line(text, '', *text_nodes)
1623 if match.string.rstrip() != '|': # not empty
1624 line.indent = len(match.group(1)) - 1
1625 return line, messages, blank_finish
1626
1627 def nest_line_block_lines(self, block):
1628 for index in range(1, len(block)):
1629 if getattr(block[index], 'indent', None) is None:
1630 block[index].indent = block[index - 1].indent
1631 self.nest_line_block_segment(block)
1632
1633 def nest_line_block_segment(self, block):
1634 indents = [item.indent for item in block]
1635 least = min(indents)
1636 new_items = []
1637 new_block = nodes.line_block()
1638 for item in block:
1639 if item.indent > least:
1640 new_block.append(item)
1641 else:
1642 if len(new_block):
1643 self.nest_line_block_segment(new_block)
1644 new_items.append(new_block)
1645 new_block = nodes.line_block()
1646 new_items.append(item)
1647 if len(new_block):
1648 self.nest_line_block_segment(new_block)
1649 new_items.append(new_block)
1650 block[:] = new_items
1651
1652 def grid_table_top(self, match, context, next_state):
1653 """Top border of a full table."""
1654 return self.table_top(match, context, next_state,
1655 self.isolate_grid_table,
1656 tableparser.GridTableParser)
1657
1658 def simple_table_top(self, match, context, next_state):
1659 """Top border of a simple table."""
1660 return self.table_top(match, context, next_state,
1661 self.isolate_simple_table,
1662 tableparser.SimpleTableParser)
1663
1664 def table_top(self, match, context, next_state,
1665 isolate_function, parser_class):
1666 """Top border of a generic table."""
1667 nodelist, blank_finish = self.table(isolate_function, parser_class)
1668 self.parent += nodelist
1669 if not blank_finish:
1670 msg = self.reporter.warning(
1671 'Blank line required after table.',
1672 line=self.state_machine.abs_line_number()+1)
1673 self.parent += msg
1674 return [], next_state, []
1675
1676 def table(self, isolate_function, parser_class):
1677 """Parse a table."""
1678 block, messages, blank_finish = isolate_function()
1679 if block:
1680 try:
1681 parser = parser_class()
1682 tabledata = parser.parse(block)
1683 tableline = (self.state_machine.abs_line_number() - len(block)
1684 + 1)
1685 table = self.build_table(tabledata, tableline)
1686 nodelist = [table] + messages
1687 except tableparser.TableMarkupError as err:
1688 nodelist = self.malformed_table(block, ' '.join(err.args),
1689 offset=err.offset) + messages
1690 else:
1691 nodelist = messages
1692 return nodelist, blank_finish
1693
1694 def isolate_grid_table(self):
1695 messages = []
1696 blank_finish = 1
1697 try:
1698 block = self.state_machine.get_text_block(flush_left=True)
1699 except statemachine.UnexpectedIndentationError as err:
1700 block, src, srcline = err.args
1701 messages.append(self.reporter.error('Unexpected indentation.',
1702 source=src, line=srcline))
1703 blank_finish = 0
1704 block.disconnect()
1705 # for East Asian chars:
1706 block.pad_double_width(self.double_width_pad_char)
1707 width = len(block[0].strip())
1708 for i in range(len(block)):
1709 block[i] = block[i].strip()
1710 if block[i][0] not in '+|': # check left edge
1711 blank_finish = 0
1712 self.state_machine.previous_line(len(block) - i)
1713 del block[i:]
1714 break
1715 if not self.grid_table_top_pat.match(block[-1]): # find bottom
1716 blank_finish = 0
1717 # from second-last to third line of table:
1718 for i in range(len(block) - 2, 1, -1):
1719 if self.grid_table_top_pat.match(block[i]):
1720 self.state_machine.previous_line(len(block) - i + 1)
1721 del block[i+1:]
1722 break
1723 else:
1724 messages.extend(self.malformed_table(block))
1725 return [], messages, blank_finish
1726 for i in range(len(block)): # check right edge
1727 if len(block[i]) != width or block[i][-1] not in '+|':
1728 messages.extend(self.malformed_table(block))
1729 return [], messages, blank_finish
1730 return block, messages, blank_finish
1731
1732 def isolate_simple_table(self):
1733 start = self.state_machine.line_offset
1734 lines = self.state_machine.input_lines
1735 limit = len(lines) - 1
1736 toplen = len(lines[start].strip())
1737 pattern_match = self.simple_table_border_pat.match
1738 found = 0
1739 found_at = None
1740 i = start + 1
1741 while i <= limit:
1742 line = lines[i]
1743 match = pattern_match(line)
1744 if match:
1745 if len(line.strip()) != toplen:
1746 self.state_machine.next_line(i - start)
1747 messages = self.malformed_table(
1748 lines[start:i+1], 'Bottom/header table border does '
1749 'not match top border.')
1750 return [], messages, i == limit or not lines[i+1].strip()
1751 found += 1
1752 found_at = i
1753 if found == 2 or i == limit or not lines[i+1].strip():
1754 end = i
1755 break
1756 i += 1
1757 else: # reached end of input_lines
1758 if found:
1759 extra = ' or no blank line after table bottom'
1760 self.state_machine.next_line(found_at - start)
1761 block = lines[start:found_at+1]
1762 else:
1763 extra = ''
1764 self.state_machine.next_line(i - start - 1)
1765 block = lines[start:]
1766 messages = self.malformed_table(
1767 block, 'No bottom table border found%s.' % extra)
1768 return [], messages, not extra
1769 self.state_machine.next_line(end - start)
1770 block = lines[start:end+1]
1771 # for East Asian chars:
1772 block.pad_double_width(self.double_width_pad_char)
1773 return block, [], end == limit or not lines[end+1].strip()
1774
1775 def malformed_table(self, block, detail='', offset=0):
1776 block.replace(self.double_width_pad_char, '')
1777 data = '\n'.join(block)
1778 message = 'Malformed table.'
1779 startline = self.state_machine.abs_line_number() - len(block) + 1
1780 if detail:
1781 message += '\n' + detail
1782 error = self.reporter.error(message, nodes.literal_block(data, data),
1783 line=startline+offset)
1784 return [error]
1785
1786 def build_table(self, tabledata, tableline, stub_columns=0, widths=None):
1787 colwidths, headrows, bodyrows = tabledata
1788 table = nodes.table()
1789 if widths == 'auto':
1790 table['classes'] += ['colwidths-auto']
1791 elif widths: # "grid" or list of integers
1792 table['classes'] += ['colwidths-given']
1793 tgroup = nodes.tgroup(cols=len(colwidths))
1794 table += tgroup
1795 for colwidth in colwidths:
1796 colspec = nodes.colspec(colwidth=colwidth)
1797 if stub_columns:
1798 colspec.attributes['stub'] = 1
1799 stub_columns -= 1
1800 tgroup += colspec
1801 if headrows:
1802 thead = nodes.thead()
1803 tgroup += thead
1804 for row in headrows:
1805 thead += self.build_table_row(row, tableline)
1806 tbody = nodes.tbody()
1807 tgroup += tbody
1808 for row in bodyrows:
1809 tbody += self.build_table_row(row, tableline)
1810 return table
1811
1812 def build_table_row(self, rowdata, tableline):
1813 row = nodes.row()
1814 for cell in rowdata:
1815 if cell is None:
1816 continue
1817 morerows, morecols, offset, cellblock = cell
1818 attributes = {}
1819 if morerows:
1820 attributes['morerows'] = morerows
1821 if morecols:
1822 attributes['morecols'] = morecols
1823 entry = nodes.entry(**attributes)
1824 row += entry
1825 if ''.join(cellblock):
1826 self.nested_parse(cellblock, input_offset=tableline+offset,
1827 node=entry)
1828 return row
1829
1830
1831 explicit = Struct()
1832 """Patterns and constants used for explicit markup recognition."""
1833
1834 explicit.patterns = Struct(
1835 target=re.compile(r"""
1836 (
1837 _ # anonymous target
1838 | # *OR*
1839 (?!_) # no underscore at the beginning
1840 (?P<quote>`?) # optional open quote
1841 (?![ `]) # first char. not space or
1842 # backquote
1843 (?P<name> # reference name
1844 .+?
1845 )
1846 %(non_whitespace_escape_before)s
1847 (?P=quote) # close quote if open quote used
1848 )
1849 (?<!(?<!\x00):) # no unescaped colon at end
1850 %(non_whitespace_escape_before)s
1851 [ ]? # optional space
1852 : # end of reference name
1853 ([ ]+|$) # followed by whitespace
1854 """ % vars(Inliner), re.VERBOSE | re.UNICODE),
1855 reference=re.compile(r"""
1856 (
1857 (?P<simple>%(simplename)s)_
1858 | # *OR*
1859 ` # open backquote
1860 (?![ ]) # not space
1861 (?P<phrase>.+?) # hyperlink phrase
1862 %(non_whitespace_escape_before)s
1863 `_ # close backquote,
1864 # reference mark
1865 )
1866 $ # end of string
1867 """ % vars(Inliner), re.VERBOSE | re.UNICODE),
1868 substitution=re.compile(r"""
1869 (
1870 (?![ ]) # first char. not space
1871 (?P<name>.+?) # substitution text
1872 %(non_whitespace_escape_before)s
1873 \| # close delimiter
1874 )
1875 ([ ]+|$) # followed by whitespace
1876 """ % vars(Inliner),
1877 re.VERBOSE | re.UNICODE),)
1878
1879 def footnote(self, match):
1880 src, srcline = self.state_machine.get_source_and_line()
1881 indented, indent, offset, blank_finish = \
1882 self.state_machine.get_first_known_indented(match.end())
1883 label = match.group(1)
1884 name = normalize_name(label)
1885 footnote = nodes.footnote('\n'.join(indented))
1886 footnote.source = src
1887 footnote.line = srcline
1888 if name[0] == '#': # auto-numbered
1889 name = name[1:] # autonumber label
1890 footnote['auto'] = 1
1891 if name:
1892 footnote['names'].append(name)
1893 self.document.note_autofootnote(footnote)
1894 elif name == '*': # auto-symbol
1895 name = ''
1896 footnote['auto'] = '*'
1897 self.document.note_symbol_footnote(footnote)
1898 else: # manually numbered
1899 footnote += nodes.label('', label)
1900 footnote['names'].append(name)
1901 self.document.note_footnote(footnote)
1902 if name:
1903 self.document.note_explicit_target(footnote, footnote)
1904 else:
1905 self.document.set_id(footnote, footnote)
1906 if indented:
1907 self.nested_parse(indented, input_offset=offset, node=footnote)
1908 return [footnote], blank_finish
1909
1910 def citation(self, match):
1911 src, srcline = self.state_machine.get_source_and_line()
1912 indented, indent, offset, blank_finish = \
1913 self.state_machine.get_first_known_indented(match.end())
1914 label = match.group(1)
1915 name = normalize_name(label)
1916 citation = nodes.citation('\n'.join(indented))
1917 citation.source = src
1918 citation.line = srcline
1919 citation += nodes.label('', label)
1920 citation['names'].append(name)
1921 self.document.note_citation(citation)
1922 self.document.note_explicit_target(citation, citation)
1923 if indented:
1924 self.nested_parse(indented, input_offset=offset, node=citation)
1925 return [citation], blank_finish
1926
1927 def hyperlink_target(self, match):
1928 pattern = self.explicit.patterns.target
1929 lineno = self.state_machine.abs_line_number()
1930 block, indent, offset, blank_finish = \
1931 self.state_machine.get_first_known_indented(
1932 match.end(), until_blank=True, strip_indent=False)
1933 blocktext = match.string[:match.end()] + '\n'.join(block)
1934 block = [escape2null(line) for line in block]
1935 escaped = block[0]
1936 blockindex = 0
1937 while True:
1938 targetmatch = pattern.match(escaped)
1939 if targetmatch:
1940 break
1941 blockindex += 1
1942 try:
1943 escaped += block[blockindex]
1944 except IndexError:
1945 raise MarkupError('malformed hyperlink target.')
1946 del block[:blockindex]
1947 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
1948 target = self.make_target(block, blocktext, lineno,
1949 targetmatch.group('name'))
1950 return [target], blank_finish
1951
1952 def make_target(self, block, block_text, lineno, target_name):
1953 target_type, data = self.parse_target(block, block_text, lineno)
1954 if target_type == 'refname':
1955 target = nodes.target(block_text, '', refname=normalize_name(data))
1956 target.indirect_reference_name = data
1957 self.add_target(target_name, '', target, lineno)
1958 self.document.note_indirect_target(target)
1959 return target
1960 elif target_type == 'refuri':
1961 target = nodes.target(block_text, '')
1962 self.add_target(target_name, data, target, lineno)
1963 return target
1964 else:
1965 return data
1966
1967 def parse_target(self, block, block_text, lineno):
1968 """
1969 Determine the type of reference of a target.
1970
1971 :Return: A 2-tuple, one of:
1972
1973 - 'refname' and the indirect reference name
1974 - 'refuri' and the URI
1975 - 'malformed' and a system_message node
1976 """
1977 if block and block[-1].strip()[-1:] == '_': # possible indirect target
1978 reference = ' '.join([line.strip() for line in block])
1979 refname = self.is_reference(reference)
1980 if refname:
1981 return 'refname', refname
1982 ref_parts = split_escaped_whitespace(' '.join(block))
1983 reference = ' '.join(''.join(unescape(part).split())
1984 for part in ref_parts)
1985 return 'refuri', reference
1986
1987 def is_reference(self, reference):
1988 match = self.explicit.patterns.reference.match(
1989 whitespace_normalize_name(reference))
1990 if not match:
1991 return None
1992 return unescape(match.group('simple') or match.group('phrase'))
1993
1994 def add_target(self, targetname, refuri, target, lineno):
1995 target.line = lineno
1996 if targetname:
1997 name = normalize_name(unescape(targetname))
1998 target['names'].append(name)
1999 if refuri:
2000 uri = self.inliner.adjust_uri(refuri)
2001 if uri:
2002 target['refuri'] = uri
2003 else:
2004 raise ApplicationError('problem with URI: %r' % refuri)
2005 self.document.note_explicit_target(target, self.parent)
2006 else: # anonymous target
2007 if refuri:
2008 target['refuri'] = refuri
2009 target['anonymous'] = 1
2010 self.document.note_anonymous_target(target)
2011
2012 def substitution_def(self, match):
2013 pattern = self.explicit.patterns.substitution
2014 src, srcline = self.state_machine.get_source_and_line()
2015 block, indent, offset, blank_finish = \
2016 self.state_machine.get_first_known_indented(match.end(),
2017 strip_indent=False)
2018 blocktext = (match.string[:match.end()] + '\n'.join(block))
2019 block.disconnect()
2020 escaped = escape2null(block[0].rstrip())
2021 blockindex = 0
2022 while True:
2023 subdefmatch = pattern.match(escaped)
2024 if subdefmatch:
2025 break
2026 blockindex += 1
2027 try:
2028 escaped = escaped + ' ' + escape2null(block[blockindex].strip())
2029 except IndexError:
2030 raise MarkupError('malformed substitution definition.')
2031 del block[:blockindex] # strip out the substitution marker
2032 block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
2033 if not block[0]:
2034 del block[0]
2035 offset += 1
2036 while block and not block[-1].strip():
2037 block.pop()
2038 subname = subdefmatch.group('name')
2039 substitution_node = nodes.substitution_definition(blocktext)
2040 substitution_node.source = src
2041 substitution_node.line = srcline
2042 if not block:
2043 msg = self.reporter.warning(
2044 'Substitution definition "%s" missing contents.' % subname,
2045 nodes.literal_block(blocktext, blocktext),
2046 source=src, line=srcline)
2047 return [msg], blank_finish
2048 block[0] = block[0].strip()
2049 substitution_node['names'].append(
2050 nodes.whitespace_normalize_name(subname))
2051 new_abs_offset, blank_finish = self.nested_list_parse(
2052 block, input_offset=offset, node=substitution_node,
2053 initial_state='SubstitutionDef', blank_finish=blank_finish)
2054 i = 0
2055 for node in substitution_node[:]:
2056 if not (isinstance(node, nodes.Inline) or
2057 isinstance(node, nodes.Text)):
2058 self.parent += substitution_node[i]
2059 del substitution_node[i]
2060 else:
2061 i += 1
2062 for node in substitution_node.traverse(nodes.Element):
2063 if self.disallowed_inside_substitution_definitions(node):
2064 pformat = nodes.literal_block('', node.pformat().rstrip())
2065 msg = self.reporter.error(
2066 'Substitution definition contains illegal element <%s>:'
2067 % node.tagname,
2068 pformat, nodes.literal_block(blocktext, blocktext),
2069 source=src, line=srcline)
2070 return [msg], blank_finish
2071 if len(substitution_node) == 0:
2072 msg = self.reporter.warning(
2073 'Substitution definition "%s" empty or invalid.' % subname,
2074 nodes.literal_block(blocktext, blocktext),
2075 source=src, line=srcline)
2076 return [msg], blank_finish
2077 self.document.note_substitution_def(
2078 substitution_node, subname, self.parent)
2079 return [substitution_node], blank_finish
2080
2081 def disallowed_inside_substitution_definitions(self, node):
2082 if (node['ids'] or
2083 isinstance(node, nodes.reference) and node.get('anonymous') or
2084 isinstance(node, nodes.footnote_reference) and node.get('auto')):
2085 return True
2086 else:
2087 return False
2088
2089 def directive(self, match, **option_presets):
2090 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2091 type_name = match.group(1)
2092 directive_class, messages = directives.directive(
2093 type_name, self.memo.language, self.document)
2094 self.parent += messages
2095 if directive_class:
2096 return self.run_directive(
2097 directive_class, match, type_name, option_presets)
2098 else:
2099 return self.unknown_directive(type_name)
2100
2101 def run_directive(self, directive, match, type_name, option_presets):
2102 """
2103 Parse a directive then run its directive function.
2104
2105 Parameters:
2106
2107 - `directive`: The class implementing the directive. Must be
2108 a subclass of `rst.Directive`.
2109
2110 - `match`: A regular expression match object which matched the first
2111 line of the directive.
2112
2113 - `type_name`: The directive name, as used in the source text.
2114
2115 - `option_presets`: A dictionary of preset options, defaults for the
2116 directive options. Currently, only an "alt" option is passed by
2117 substitution definitions (value: the substitution name), which may
2118 be used by an embedded image directive.
2119
2120 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2121 """
2122 if isinstance(directive, (FunctionType, MethodType)):
2123 from docutils.parsers.rst import convert_directive_function
2124 directive = convert_directive_function(directive)
2125 lineno = self.state_machine.abs_line_number()
2126 initial_line_offset = self.state_machine.line_offset
2127 indented, indent, line_offset, blank_finish \
2128 = self.state_machine.get_first_known_indented(match.end(),
2129 strip_top=0)
2130 block_text = '\n'.join(self.state_machine.input_lines[
2131 initial_line_offset : self.state_machine.line_offset + 1])
2132 try:
2133 arguments, options, content, content_offset = (
2134 self.parse_directive_block(indented, line_offset,
2135 directive, option_presets))
2136 except MarkupError as detail:
2137 error = self.reporter.error(
2138 'Error in "%s" directive:\n%s.' % (type_name,
2139 ' '.join(detail.args)),
2140 nodes.literal_block(block_text, block_text), line=lineno)
2141 return [error], blank_finish
2142 directive_instance = directive(
2143 type_name, arguments, options, content, lineno,
2144 content_offset, block_text, self, self.state_machine)
2145 try:
2146 result = directive_instance.run()
2147 except docutils.parsers.rst.DirectiveError as error:
2148 msg_node = self.reporter.system_message(error.level, error.msg,
2149 line=lineno)
2150 msg_node += nodes.literal_block(block_text, block_text)
2151 result = [msg_node]
2152 assert isinstance(result, list), \
2153 'Directive "%s" must return a list of nodes.' % type_name
2154 for i in range(len(result)):
2155 assert isinstance(result[i], nodes.Node), \
2156 ('Directive "%s" returned non-Node object (index %s): %r'
2157 % (type_name, i, result[i]))
2158 return (result,
2159 blank_finish or self.state_machine.is_next_line_blank())
2160
2161 def parse_directive_block(self, indented, line_offset, directive,
2162 option_presets):
2163 option_spec = directive.option_spec
2164 has_content = directive.has_content
2165 if indented and not indented[0].strip():
2166 indented.trim_start()
2167 line_offset += 1
2168 while indented and not indented[-1].strip():
2169 indented.trim_end()
2170 if indented and (directive.required_arguments
2171 or directive.optional_arguments
2172 or option_spec):
2173 for i, line in enumerate(indented):
2174 if not line.strip():
2175 break
2176 else:
2177 i += 1
2178 arg_block = indented[:i]
2179 content = indented[i+1:]
2180 content_offset = line_offset + i + 1
2181 else:
2182 content = indented
2183 content_offset = line_offset
2184 arg_block = []
2185 if option_spec:
2186 options, arg_block = self.parse_directive_options(
2187 option_presets, option_spec, arg_block)
2188 else:
2189 options = {}
2190 if arg_block and not (directive.required_arguments
2191 or directive.optional_arguments):
2192 content = arg_block + indented[i:]
2193 content_offset = line_offset
2194 arg_block = []
2195 while content and not content[0].strip():
2196 content.trim_start()
2197 content_offset += 1
2198 if directive.required_arguments or directive.optional_arguments:
2199 arguments = self.parse_directive_arguments(
2200 directive, arg_block)
2201 else:
2202 arguments = []
2203 if content and not has_content:
2204 raise MarkupError('no content permitted')
2205 return (arguments, options, content, content_offset)
2206
2207 def parse_directive_options(self, option_presets, option_spec, arg_block):
2208 options = option_presets.copy()
2209 for i, line in enumerate(arg_block):
2210 if re.match(Body.patterns['field_marker'], line):
2211 opt_block = arg_block[i:]
2212 arg_block = arg_block[:i]
2213 break
2214 else:
2215 opt_block = []
2216 if opt_block:
2217 success, data = self.parse_extension_options(option_spec,
2218 opt_block)
2219 if success: # data is a dict of options
2220 options.update(data)
2221 else: # data is an error string
2222 raise MarkupError(data)
2223 return options, arg_block
2224
2225 def parse_directive_arguments(self, directive, arg_block):
2226 required = directive.required_arguments
2227 optional = directive.optional_arguments
2228 arg_text = '\n'.join(arg_block)
2229 arguments = arg_text.split()
2230 if len(arguments) < required:
2231 raise MarkupError('%s argument(s) required, %s supplied'
2232 % (required, len(arguments)))
2233 elif len(arguments) > required + optional:
2234 if directive.final_argument_whitespace:
2235 arguments = arg_text.split(None, required + optional - 1)
2236 else:
2237 raise MarkupError(
2238 'maximum %s argument(s) allowed, %s supplied'
2239 % (required + optional, len(arguments)))
2240 return arguments
2241
2242 def parse_extension_options(self, option_spec, datalines):
2243 """
2244 Parse `datalines` for a field list containing extension options
2245 matching `option_spec`.
2246
2247 :Parameters:
2248 - `option_spec`: a mapping of option name to conversion
2249 function, which should raise an exception on bad input.
2250 - `datalines`: a list of input strings.
2251
2252 :Return:
2253 - Success value, 1 or 0.
2254 - An option dictionary on success, an error string on failure.
2255 """
2256 node = nodes.field_list()
2257 newline_offset, blank_finish = self.nested_list_parse(
2258 datalines, 0, node, initial_state='ExtensionOptions',
2259 blank_finish=True)
2260 if newline_offset != len(datalines): # incomplete parse of block
2261 return 0, 'invalid option block'
2262 try:
2263 options = utils.extract_extension_options(node, option_spec)
2264 except KeyError as detail:
2265 return 0, ('unknown option: "%s"' % detail.args[0])
2266 except (ValueError, TypeError) as detail:
2267 return 0, ('invalid option value: %s' % ' '.join(detail.args))
2268 except utils.ExtensionOptionError as detail:
2269 return 0, ('invalid option data: %s' % ' '.join(detail.args))
2270 if blank_finish:
2271 return 1, options
2272 else:
2273 return 0, 'option data incompletely parsed'
2274
2275 def unknown_directive(self, type_name):
2276 lineno = self.state_machine.abs_line_number()
2277 indented, indent, offset, blank_finish = \
2278 self.state_machine.get_first_known_indented(0, strip_indent=False)
2279 text = '\n'.join(indented)
2280 error = self.reporter.error(
2281 'Unknown directive type "%s".' % type_name,
2282 nodes.literal_block(text, text), line=lineno)
2283 return [error], blank_finish
2284
2285 def comment(self, match):
2286 if not match.string[match.end():].strip() \
2287 and self.state_machine.is_next_line_blank(): # an empty comment?
2288 return [nodes.comment()], 1 # "A tiny but practical wart."
2289 indented, indent, offset, blank_finish = \
2290 self.state_machine.get_first_known_indented(match.end())
2291 while indented and not indented[-1].strip():
2292 indented.trim_end()
2293 text = '\n'.join(indented)
2294 return [nodes.comment(text, text)], blank_finish
2295
2296 explicit.constructs = [
2297 (footnote,
2298 re.compile(r"""
2299 \.\.[ ]+ # explicit markup start
2300 \[
2301 ( # footnote label:
2302 [0-9]+ # manually numbered footnote
2303 | # *OR*
2304 \# # anonymous auto-numbered footnote
2305 | # *OR*
2306 \#%s # auto-number ed?) footnote label
2307 | # *OR*
2308 \* # auto-symbol footnote
2309 )
2310 \]
2311 ([ ]+|$) # whitespace or end of line
2312 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2313 (citation,
2314 re.compile(r"""
2315 \.\.[ ]+ # explicit markup start
2316 \[(%s)\] # citation label
2317 ([ ]+|$) # whitespace or end of line
2318 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2319 (hyperlink_target,
2320 re.compile(r"""
2321 \.\.[ ]+ # explicit markup start
2322 _ # target indicator
2323 (?![ ]|$) # first char. not space or EOL
2324 """, re.VERBOSE | re.UNICODE)),
2325 (substitution_def,
2326 re.compile(r"""
2327 \.\.[ ]+ # explicit markup start
2328 \| # substitution indicator
2329 (?![ ]|$) # first char. not space or EOL
2330 """, re.VERBOSE | re.UNICODE)),
2331 (directive,
2332 re.compile(r"""
2333 \.\.[ ]+ # explicit markup start
2334 (%s) # directive name
2335 [ ]? # optional space
2336 :: # directive delimiter
2337 ([ ]+|$) # whitespace or end of line
2338 """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
2339
2340 def explicit_markup(self, match, context, next_state):
2341 """Footnotes, hyperlink targets, directives, comments."""
2342 nodelist, blank_finish = self.explicit_construct(match)
2343 self.parent += nodelist
2344 self.explicit_list(blank_finish)
2345 return [], next_state, []
2346
2347 def explicit_construct(self, match):
2348 """Determine which explicit construct this is, parse & return it."""
2349 errors = []
2350 for method, pattern in self.explicit.constructs:
2351 expmatch = pattern.match(match.string)
2352 if expmatch:
2353 try:
2354 return method(self, expmatch)
2355 except MarkupError as error:
2356 lineno = self.state_machine.abs_line_number()
2357 message = ' '.join(error.args)
2358 errors.append(self.reporter.warning(message, line=lineno))
2359 break
2360 nodelist, blank_finish = self.comment(match)
2361 return nodelist + errors, blank_finish
2362
2363 def explicit_list(self, blank_finish):
2364 """
2365 Create a nested state machine for a series of explicit markup
2366 constructs (including anonymous hyperlink targets).
2367 """
2368 offset = self.state_machine.line_offset + 1 # next line
2369 newline_offset, blank_finish = self.nested_list_parse(
2370 self.state_machine.input_lines[offset:],
2371 input_offset=self.state_machine.abs_line_offset() + 1,
2372 node=self.parent, initial_state='Explicit',
2373 blank_finish=blank_finish,
2374 match_titles=self.state_machine.match_titles)
2375 self.goto_line(newline_offset)
2376 if not blank_finish:
2377 self.parent += self.unindent_warning('Explicit markup')
2378
2379 def anonymous(self, match, context, next_state):
2380 """Anonymous hyperlink targets."""
2381 nodelist, blank_finish = self.anonymous_target(match)
2382 self.parent += nodelist
2383 self.explicit_list(blank_finish)
2384 return [], next_state, []
2385
2386 def anonymous_target(self, match):
2387 lineno = self.state_machine.abs_line_number()
2388 block, indent, offset, blank_finish \
2389 = self.state_machine.get_first_known_indented(match.end(),
2390 until_blank=True)
2391 blocktext = match.string[:match.end()] + '\n'.join(block)
2392 block = [escape2null(line) for line in block]
2393 target = self.make_target(block, blocktext, lineno, '')
2394 return [target], blank_finish
2395
2396 def line(self, match, context, next_state):
2397 """Section title overline or transition marker."""
2398 if self.state_machine.match_titles:
2399 return [match.string], 'Line', []
2400 elif match.string.strip() == '::':
2401 raise statemachine.TransitionCorrection('text')
2402 elif len(match.string.strip()) < 4:
2403 msg = self.reporter.info(
2404 'Unexpected possible title overline or transition.\n'
2405 "Treating it as ordinary text because it's so short.",
2406 line=self.state_machine.abs_line_number())
2407 self.parent += msg
2408 raise statemachine.TransitionCorrection('text')
2409 else:
2410 blocktext = self.state_machine.line
2411 msg = self.reporter.severe(
2412 'Unexpected section title or transition.',
2413 nodes.literal_block(blocktext, blocktext),
2414 line=self.state_machine.abs_line_number())
2415 self.parent += msg
2416 return [], next_state, []
2417
2418 def text(self, match, context, next_state):
2419 """Titles, definition lists, paragraphs."""
2420 return [match.string], 'Text', []
2421
2422
2423 class RFC2822Body(Body):
2424
2425 """
2426 RFC2822 headers are only valid as the first constructs in documents. As
2427 soon as anything else appears, the `Body` state should take over.
2428 """
2429
2430 patterns = Body.patterns.copy() # can't modify the original
2431 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2432 initial_transitions = [(name, 'Body')
2433 for name in Body.initial_transitions]
2434 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2435
2436 def rfc2822(self, match, context, next_state):
2437 """RFC2822-style field list item."""
2438 fieldlist = nodes.field_list(classes=['rfc2822'])
2439 self.parent += fieldlist
2440 field, blank_finish = self.rfc2822_field(match)
2441 fieldlist += field
2442 offset = self.state_machine.line_offset + 1 # next line
2443 newline_offset, blank_finish = self.nested_list_parse(
2444 self.state_machine.input_lines[offset:],
2445 input_offset=self.state_machine.abs_line_offset() + 1,
2446 node=fieldlist, initial_state='RFC2822List',
2447 blank_finish=blank_finish)
2448 self.goto_line(newline_offset)
2449 if not blank_finish:
2450 self.parent += self.unindent_warning(
2451 'RFC2822-style field list')
2452 return [], next_state, []
2453
2454 def rfc2822_field(self, match):
2455 name = match.string[:match.string.find(':')]
2456 indented, indent, line_offset, blank_finish = \
2457 self.state_machine.get_first_known_indented(match.end(),
2458 until_blank=True)
2459 fieldnode = nodes.field()
2460 fieldnode += nodes.field_name(name, name)
2461 fieldbody = nodes.field_body('\n'.join(indented))
2462 fieldnode += fieldbody
2463 if indented:
2464 self.nested_parse(indented, input_offset=line_offset,
2465 node=fieldbody)
2466 return fieldnode, blank_finish
2467
2468
2469 class SpecializedBody(Body):
2470
2471 """
2472 Superclass for second and subsequent compound element members. Compound
2473 elements are lists and list-like constructs.
2474
2475 All transition methods are disabled (redefined as `invalid_input`).
2476 Override individual methods in subclasses to re-enable.
2477
2478 For example, once an initial bullet list item, say, is recognized, the
2479 `BulletList` subclass takes over, with a "bullet_list" node as its
2480 container. Upon encountering the initial bullet list item, `Body.bullet`
2481 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2482 starts up a nested parsing session with `BulletList` as the initial state.
2483 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2484 as only bullet list items are encountered, they are parsed and inserted
2485 into the container. The first construct which is *not* a bullet list item
2486 triggers the `invalid_input` method, which ends the nested parse and
2487 closes the container. `BulletList` needs to recognize input that is
2488 invalid in the context of a bullet list, which means everything *other
2489 than* bullet list items, so it inherits the transition list created in
2490 `Body`.
2491 """
2492
2493 def invalid_input(self, match=None, context=None, next_state=None):
2494 """Not a compound element member. Abort this state machine."""
2495 self.state_machine.previous_line() # back up so parent SM can reassess
2496 raise EOFError
2497
2498 indent = invalid_input
2499 bullet = invalid_input
2500 enumerator = invalid_input
2501 field_marker = invalid_input
2502 option_marker = invalid_input
2503 doctest = invalid_input
2504 line_block = invalid_input
2505 grid_table_top = invalid_input
2506 simple_table_top = invalid_input
2507 explicit_markup = invalid_input
2508 anonymous = invalid_input
2509 line = invalid_input
2510 text = invalid_input
2511
2512
2513 class BulletList(SpecializedBody):
2514
2515 """Second and subsequent bullet_list list_items."""
2516
2517 def bullet(self, match, context, next_state):
2518 """Bullet list item."""
2519 if match.string[0] != self.parent['bullet']:
2520 # different bullet: new list
2521 self.invalid_input()
2522 listitem, blank_finish = self.list_item(match.end())
2523 self.parent += listitem
2524 self.blank_finish = blank_finish
2525 return [], next_state, []
2526
2527
2528 class DefinitionList(SpecializedBody):
2529
2530 """Second and subsequent definition_list_items."""
2531
2532 def text(self, match, context, next_state):
2533 """Definition lists."""
2534 return [match.string], 'Definition', []
2535
2536
2537 class EnumeratedList(SpecializedBody):
2538
2539 """Second and subsequent enumerated_list list_items."""
2540
2541 def enumerator(self, match, context, next_state):
2542 """Enumerated list item."""
2543 format, sequence, text, ordinal = self.parse_enumerator(
2544 match, self.parent['enumtype'])
2545 if ( format != self.format
2546 or (sequence != '#' and (sequence != self.parent['enumtype']
2547 or self.auto
2548 or ordinal != (self.lastordinal + 1)))
2549 or not self.is_enumerated_list_item(ordinal, sequence, format)):
2550 # different enumeration: new list
2551 self.invalid_input()
2552 if sequence == '#':
2553 self.auto = 1
2554 listitem, blank_finish = self.list_item(match.end())
2555 self.parent += listitem
2556 self.blank_finish = blank_finish
2557 self.lastordinal = ordinal
2558 return [], next_state, []
2559
2560
2561 class FieldList(SpecializedBody):
2562
2563 """Second and subsequent field_list fields."""
2564
2565 def field_marker(self, match, context, next_state):
2566 """Field list field."""
2567 field, blank_finish = self.field(match)
2568 self.parent += field
2569 self.blank_finish = blank_finish
2570 return [], next_state, []
2571
2572
2573 class OptionList(SpecializedBody):
2574
2575 """Second and subsequent option_list option_list_items."""
2576
2577 def option_marker(self, match, context, next_state):
2578 """Option list item."""
2579 try:
2580 option_list_item, blank_finish = self.option_list_item(match)
2581 except MarkupError:
2582 self.invalid_input()
2583 self.parent += option_list_item
2584 self.blank_finish = blank_finish
2585 return [], next_state, []
2586
2587
2588 class RFC2822List(SpecializedBody, RFC2822Body):
2589
2590 """Second and subsequent RFC2822-style field_list fields."""
2591
2592 patterns = RFC2822Body.patterns
2593 initial_transitions = RFC2822Body.initial_transitions
2594
2595 def rfc2822(self, match, context, next_state):
2596 """RFC2822-style field list item."""
2597 field, blank_finish = self.rfc2822_field(match)
2598 self.parent += field
2599 self.blank_finish = blank_finish
2600 return [], 'RFC2822List', []
2601
2602 blank = SpecializedBody.invalid_input
2603
2604
2605 class ExtensionOptions(FieldList):
2606
2607 """
2608 Parse field_list fields for extension options.
2609
2610 No nested parsing is done (including inline markup parsing).
2611 """
2612
2613 def parse_field_body(self, indented, offset, node):
2614 """Override `Body.parse_field_body` for simpler parsing."""
2615 lines = []
2616 for line in list(indented) + ['']:
2617 if line.strip():
2618 lines.append(line)
2619 elif lines:
2620 text = '\n'.join(lines)
2621 node += nodes.paragraph(text, text)
2622 lines = []
2623
2624
2625 class LineBlock(SpecializedBody):
2626
2627 """Second and subsequent lines of a line_block."""
2628
2629 blank = SpecializedBody.invalid_input
2630
2631 def line_block(self, match, context, next_state):
2632 """New line of line block."""
2633 lineno = self.state_machine.abs_line_number()
2634 line, messages, blank_finish = self.line_block_line(match, lineno)
2635 self.parent += line
2636 self.parent.parent += messages
2637 self.blank_finish = blank_finish
2638 return [], next_state, []
2639
2640
2641 class Explicit(SpecializedBody):
2642
2643 """Second and subsequent explicit markup construct."""
2644
2645 def explicit_markup(self, match, context, next_state):
2646 """Footnotes, hyperlink targets, directives, comments."""
2647 nodelist, blank_finish = self.explicit_construct(match)
2648 self.parent += nodelist
2649 self.blank_finish = blank_finish
2650 return [], next_state, []
2651
2652 def anonymous(self, match, context, next_state):
2653 """Anonymous hyperlink targets."""
2654 nodelist, blank_finish = self.anonymous_target(match)
2655 self.parent += nodelist
2656 self.blank_finish = blank_finish
2657 return [], next_state, []
2658
2659 blank = SpecializedBody.invalid_input
2660
2661
2662 class SubstitutionDef(Body):
2663
2664 """
2665 Parser for the contents of a substitution_definition element.
2666 """
2667
2668 patterns = {
2669 'embedded_directive': re.compile(r'(%s)::( +|$)'
2670 % Inliner.simplename, re.UNICODE),
2671 'text': r''}
2672 initial_transitions = ['embedded_directive', 'text']
2673
2674 def embedded_directive(self, match, context, next_state):
2675 nodelist, blank_finish = self.directive(match,
2676 alt=self.parent['names'][0])
2677 self.parent += nodelist
2678 if not self.state_machine.at_eof():
2679 self.blank_finish = blank_finish
2680 raise EOFError
2681
2682 def text(self, match, context, next_state):
2683 if not self.state_machine.at_eof():
2684 self.blank_finish = self.state_machine.is_next_line_blank()
2685 raise EOFError
2686
2687
2688 class Text(RSTState):
2689
2690 """
2691 Classifier of second line of a text block.
2692
2693 Could be a paragraph, a definition list item, or a title.
2694 """
2695
2696 patterns = {'underline': Body.patterns['line'],
2697 'text': r''}
2698 initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2699
2700 def blank(self, match, context, next_state):
2701 """End of paragraph."""
2702 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext
2703 paragraph, literalnext = self.paragraph(
2704 context, self.state_machine.abs_line_number() - 1)
2705 self.parent += paragraph
2706 if literalnext:
2707 self.parent += self.literal_block()
2708 return [], 'Body', []
2709
2710 def eof(self, context):
2711 if context:
2712 self.blank(None, context, None)
2713 return []
2714
2715 def indent(self, match, context, next_state):
2716 """Definition list item."""
2717 definitionlist = nodes.definition_list()
2718 definitionlistitem, blank_finish = self.definition_list_item(context)
2719 definitionlist += definitionlistitem
2720 self.parent += definitionlist
2721 offset = self.state_machine.line_offset + 1 # next line
2722 newline_offset, blank_finish = self.nested_list_parse(
2723 self.state_machine.input_lines[offset:],
2724 input_offset=self.state_machine.abs_line_offset() + 1,
2725 node=definitionlist, initial_state='DefinitionList',
2726 blank_finish=blank_finish, blank_finish_state='Definition')
2727 self.goto_line(newline_offset)
2728 if not blank_finish:
2729 self.parent += self.unindent_warning('Definition list')
2730 return [], 'Body', []
2731
2732 def underline(self, match, context, next_state):
2733 """Section title."""
2734 lineno = self.state_machine.abs_line_number()
2735 title = context[0].rstrip()
2736 underline = match.string.rstrip()
2737 source = title + '\n' + underline
2738 messages = []
2739 if column_width(title) > len(underline):
2740 if len(underline) < 4:
2741 if self.state_machine.match_titles:
2742 msg = self.reporter.info(
2743 'Possible title underline, too short for the title.\n'
2744 "Treating it as ordinary text because it's so short.",
2745 line=lineno)
2746 self.parent += msg
2747 raise statemachine.TransitionCorrection('text')
2748 else:
2749 blocktext = context[0] + '\n' + self.state_machine.line
2750 msg = self.reporter.warning('Title underline too short.',
2751 nodes.literal_block(blocktext, blocktext), line=lineno)
2752 messages.append(msg)
2753 if not self.state_machine.match_titles:
2754 blocktext = context[0] + '\n' + self.state_machine.line
2755 # We need get_source_and_line() here to report correctly
2756 src, srcline = self.state_machine.get_source_and_line()
2757 # TODO: why is abs_line_number() == srcline+1
2758 # if the error is in a table (try with test_tables.py)?
2759 # print("get_source_and_line", srcline)
2760 # print("abs_line_number", self.state_machine.abs_line_number())
2761 msg = self.reporter.severe('Unexpected section title.',
2762 nodes.literal_block(blocktext, blocktext),
2763 source=src, line=srcline)
2764 self.parent += messages
2765 self.parent += msg
2766 return [], next_state, []
2767 style = underline[0]
2768 context[:] = []
2769 self.section(title, source, style, lineno - 1, messages)
2770 return [], next_state, []
2771
2772 def text(self, match, context, next_state):
2773 """Paragraph."""
2774 startline = self.state_machine.abs_line_number() - 1
2775 msg = None
2776 try:
2777 block = self.state_machine.get_text_block(flush_left=True)
2778 except statemachine.UnexpectedIndentationError as err:
2779 block, src, srcline = err.args
2780 msg = self.reporter.error('Unexpected indentation.',
2781 source=src, line=srcline)
2782 lines = context + list(block)
2783 paragraph, literalnext = self.paragraph(lines, startline)
2784 self.parent += paragraph
2785 self.parent += msg
2786 if literalnext:
2787 try:
2788 self.state_machine.next_line()
2789 except EOFError:
2790 pass
2791 self.parent += self.literal_block()
2792 return [], next_state, []
2793
2794 def literal_block(self):
2795 """Return a list of nodes."""
2796 indented, indent, offset, blank_finish = \
2797 self.state_machine.get_indented()
2798 while indented and not indented[-1].strip():
2799 indented.trim_end()
2800 if not indented:
2801 return self.quoted_literal_block()
2802 data = '\n'.join(indented)
2803 literal_block = nodes.literal_block(data, data)
2804 (literal_block.source,
2805 literal_block.line) = self.state_machine.get_source_and_line(offset+1)
2806 nodelist = [literal_block]
2807 if not blank_finish:
2808 nodelist.append(self.unindent_warning('Literal block'))
2809 return nodelist
2810
2811 def quoted_literal_block(self):
2812 abs_line_offset = self.state_machine.abs_line_offset()
2813 offset = self.state_machine.line_offset
2814 parent_node = nodes.Element()
2815 new_abs_offset = self.nested_parse(
2816 self.state_machine.input_lines[offset:],
2817 input_offset=abs_line_offset, node=parent_node, match_titles=False,
2818 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2819 'initial_state': 'QuotedLiteralBlock'})
2820 self.goto_line(new_abs_offset)
2821 return parent_node.children
2822
2823 def definition_list_item(self, termline):
2824 indented, indent, line_offset, blank_finish = \
2825 self.state_machine.get_indented()
2826 itemnode = nodes.definition_list_item(
2827 '\n'.join(termline + list(indented)))
2828 lineno = self.state_machine.abs_line_number() - 1
2829 (itemnode.source,
2830 itemnode.line) = self.state_machine.get_source_and_line(lineno)
2831 termlist, messages = self.term(termline, lineno)
2832 itemnode += termlist
2833 definition = nodes.definition('', *messages)
2834 itemnode += definition
2835 if termline[0][-2:] == '::':
2836 definition += self.reporter.info(
2837 'Blank line missing before literal block (after the "::")? '
2838 'Interpreted as a definition list item.',
2839 line=lineno+1)
2840 self.nested_parse(indented, input_offset=line_offset, node=definition)
2841 return itemnode, blank_finish
2842
2843 classifier_delimiter = re.compile(' +: +')
2844
2845 def term(self, lines, lineno):
2846 """Return a definition_list's term and optional classifiers."""
2847 assert len(lines) == 1
2848 text_nodes, messages = self.inline_text(lines[0], lineno)
2849 term_node = nodes.term(lines[0])
2850 (term_node.source,
2851 term_node.line) = self.state_machine.get_source_and_line(lineno)
2852 node_list = [term_node]
2853 for i in range(len(text_nodes)):
2854 node = text_nodes[i]
2855 if isinstance(node, nodes.Text):
2856 parts = self.classifier_delimiter.split(node)
2857 if len(parts) == 1:
2858 node_list[-1] += node
2859 else:
2860 text = parts[0].rstrip()
2861 textnode = nodes.Text(text)
2862 node_list[-1] += textnode
2863 for part in parts[1:]:
2864 node_list.append(
2865 nodes.classifier(unescape(part, True), part))
2866 else:
2867 node_list[-1] += node
2868 return node_list, messages
2869
2870
2871 class SpecializedText(Text):
2872
2873 """
2874 Superclass for second and subsequent lines of Text-variants.
2875
2876 All transition methods are disabled. Override individual methods in
2877 subclasses to re-enable.
2878 """
2879
2880 def eof(self, context):
2881 """Incomplete construct."""
2882 return []
2883
2884 def invalid_input(self, match=None, context=None, next_state=None):
2885 """Not a compound element member. Abort this state machine."""
2886 raise EOFError
2887
2888 blank = invalid_input
2889 indent = invalid_input
2890 underline = invalid_input
2891 text = invalid_input
2892
2893
2894 class Definition(SpecializedText):
2895
2896 """Second line of potential definition_list_item."""
2897
2898 def eof(self, context):
2899 """Not a definition."""
2900 self.state_machine.previous_line(2) # so parent SM can reassess
2901 return []
2902
2903 def indent(self, match, context, next_state):
2904 """Definition list item."""
2905 itemnode, blank_finish = self.definition_list_item(context)
2906 self.parent += itemnode
2907 self.blank_finish = blank_finish
2908 return [], 'DefinitionList', []
2909
2910
2911 class Line(SpecializedText):
2912
2913 """
2914 Second line of over- & underlined section title or transition marker.
2915 """
2916
2917 eofcheck = 1 # @@@ ???
2918 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2919
2920 def eof(self, context):
2921 """Transition marker at end of section or document."""
2922 marker = context[0].strip()
2923 if self.memo.section_bubble_up_kludge:
2924 self.memo.section_bubble_up_kludge = False
2925 elif len(marker) < 4:
2926 self.state_correction(context)
2927 if self.eofcheck: # ignore EOFError with sections
2928 lineno = self.state_machine.abs_line_number() - 1
2929 transition = nodes.transition(rawsource=context[0])
2930 transition.line = lineno
2931 self.parent += transition
2932 self.eofcheck = 1
2933 return []
2934
2935 def blank(self, match, context, next_state):
2936 """Transition marker."""
2937 src, srcline = self.state_machine.get_source_and_line()
2938 marker = context[0].strip()
2939 if len(marker) < 4:
2940 self.state_correction(context)
2941 transition = nodes.transition(rawsource=marker)
2942 transition.source = src
2943 transition.line = srcline - 1
2944 self.parent += transition
2945 return [], 'Body', []
2946
2947 def text(self, match, context, next_state):
2948 """Potential over- & underlined title."""
2949 lineno = self.state_machine.abs_line_number() - 1
2950 overline = context[0]
2951 title = match.string
2952 underline = ''
2953 try:
2954 underline = self.state_machine.next_line()
2955 except EOFError:
2956 blocktext = overline + '\n' + title
2957 if len(overline.rstrip()) < 4:
2958 self.short_overline(context, blocktext, lineno, 2)
2959 else:
2960 msg = self.reporter.severe(
2961 'Incomplete section title.',
2962 nodes.literal_block(blocktext, blocktext),
2963 line=lineno)
2964 self.parent += msg
2965 return [], 'Body', []
2966 source = '%s\n%s\n%s' % (overline, title, underline)
2967 overline = overline.rstrip()
2968 underline = underline.rstrip()
2969 if not self.transitions['underline'][0].match(underline):
2970 blocktext = overline + '\n' + title + '\n' + underline
2971 if len(overline.rstrip()) < 4:
2972 self.short_overline(context, blocktext, lineno, 2)
2973 else:
2974 msg = self.reporter.severe(
2975 'Missing matching underline for section title overline.',
2976 nodes.literal_block(source, source),
2977 line=lineno)
2978 self.parent += msg
2979 return [], 'Body', []
2980 elif overline != underline:
2981 blocktext = overline + '\n' + title + '\n' + underline
2982 if len(overline.rstrip()) < 4:
2983 self.short_overline(context, blocktext, lineno, 2)
2984 else:
2985 msg = self.reporter.severe(
2986 'Title overline & underline mismatch.',
2987 nodes.literal_block(source, source),
2988 line=lineno)
2989 self.parent += msg
2990 return [], 'Body', []
2991 title = title.rstrip()
2992 messages = []
2993 if column_width(title) > len(overline):
2994 blocktext = overline + '\n' + title + '\n' + underline
2995 if len(overline.rstrip()) < 4:
2996 self.short_overline(context, blocktext, lineno, 2)
2997 else:
2998 msg = self.reporter.warning(
2999 'Title overline too short.',
3000 nodes.literal_block(source, source),
3001 line=lineno)
3002 messages.append(msg)
3003 style = (overline[0], underline[0])
3004 self.eofcheck = 0 # @@@ not sure this is correct
3005 self.section(title.lstrip(), source, style, lineno + 1, messages)
3006 self.eofcheck = 1
3007 return [], 'Body', []
3008
3009 indent = text # indented title
3010
3011 def underline(self, match, context, next_state):
3012 overline = context[0]
3013 blocktext = overline + '\n' + self.state_machine.line
3014 lineno = self.state_machine.abs_line_number() - 1
3015 if len(overline.rstrip()) < 4:
3016 self.short_overline(context, blocktext, lineno, 1)
3017 msg = self.reporter.error(
3018 'Invalid section title or transition marker.',
3019 nodes.literal_block(blocktext, blocktext),
3020 line=lineno)
3021 self.parent += msg
3022 return [], 'Body', []
3023
3024 def short_overline(self, context, blocktext, lineno, lines=1):
3025 msg = self.reporter.info(
3026 'Possible incomplete section title.\nTreating the overline as '
3027 "ordinary text because it's so short.",
3028 line=lineno)
3029 self.parent += msg
3030 self.state_correction(context, lines)
3031
3032 def state_correction(self, context, lines=1):
3033 self.state_machine.previous_line(lines)
3034 context[:] = []
3035 raise statemachine.StateCorrection('Body', 'text')
3036
3037
3038 class QuotedLiteralBlock(RSTState):
3039
3040 """
3041 Nested parse handler for quoted (unindented) literal blocks.
3042
3043 Special-purpose. Not for inclusion in `state_classes`.
3044 """
3045
3046 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
3047 'text': r''}
3048 initial_transitions = ('initial_quoted', 'text')
3049
3050 def __init__(self, state_machine, debug=False):
3051 RSTState.__init__(self, state_machine, debug)
3052 self.messages = []
3053 self.initial_lineno = None
3054
3055 def blank(self, match, context, next_state):
3056 if context:
3057 raise EOFError
3058 else:
3059 return context, next_state, []
3060
3061 def eof(self, context):
3062 if context:
3063 src, srcline = self.state_machine.get_source_and_line(
3064 self.initial_lineno)
3065 text = '\n'.join(context)
3066 literal_block = nodes.literal_block(text, text)
3067 literal_block.source = src
3068 literal_block.line = srcline
3069 self.parent += literal_block
3070 else:
3071 self.parent += self.reporter.warning(
3072 'Literal block expected; none found.',
3073 line=self.state_machine.abs_line_number())
3074 # src not available, because statemachine.input_lines is empty
3075 self.state_machine.previous_line()
3076 self.parent += self.messages
3077 return []
3078
3079 def indent(self, match, context, next_state):
3080 assert context, ('QuotedLiteralBlock.indent: context should not '
3081 'be empty!')
3082 self.messages.append(
3083 self.reporter.error('Unexpected indentation.',
3084 line=self.state_machine.abs_line_number()))
3085 self.state_machine.previous_line()
3086 raise EOFError
3087
3088 def initial_quoted(self, match, context, next_state):
3089 """Match arbitrary quote character on the first line only."""
3090 self.remove_transition('initial_quoted')
3091 quote = match.string[0]
3092 pattern = re.compile(re.escape(quote), re.UNICODE)
3093 # New transition matches consistent quotes only:
3094 self.add_transition('quoted',
3095 (pattern, self.quoted, self.__class__.__name__))
3096 self.initial_lineno = self.state_machine.abs_line_number()
3097 return [match.string], next_state, []
3098
3099 def quoted(self, match, context, next_state):
3100 """Match consistent quotes on subsequent lines."""
3101 context.append(match.string)
3102 return context, next_state, []
3103
3104 def text(self, match, context, next_state):
3105 if context:
3106 self.messages.append(
3107 self.reporter.error('Inconsistent literal block quoting.',
3108 line=self.state_machine.abs_line_number()))
3109 self.state_machine.previous_line()
3110 raise EOFError
3111
3112
3113 state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
3114 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
3115 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
3116 """Standard set of State classes used to start `RSTStateMachine`."""