Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/docutils/parsers/rst/states.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 # $Id: states.py 8359 2019-08-26 16:45:33Z milde $ | |
2 # Author: David Goodger <goodger@python.org> | |
3 # Copyright: This module has been placed in the public domain. | |
4 | |
5 """ | |
6 This is the ``docutils.parsers.rst.states`` module, the core of | |
7 the reStructuredText parser. It defines the following: | |
8 | |
9 :Classes: | |
10 - `RSTStateMachine`: reStructuredText parser's entry point. | |
11 - `NestedStateMachine`: recursive StateMachine. | |
12 - `RSTState`: reStructuredText State superclass. | |
13 - `Inliner`: For parsing inline markup. | |
14 - `Body`: Generic classifier of the first line of a block. | |
15 - `SpecializedBody`: Superclass for compound element members. | |
16 - `BulletList`: Second and subsequent bullet_list list_items | |
17 - `DefinitionList`: Second+ definition_list_items. | |
18 - `EnumeratedList`: Second+ enumerated_list list_items. | |
19 - `FieldList`: Second+ fields. | |
20 - `OptionList`: Second+ option_list_items. | |
21 - `RFC2822List`: Second+ RFC2822-style fields. | |
22 - `ExtensionOptions`: Parses directive option fields. | |
23 - `Explicit`: Second+ explicit markup constructs. | |
24 - `SubstitutionDef`: For embedded directives in substitution definitions. | |
25 - `Text`: Classifier of second line of a text block. | |
26 - `SpecializedText`: Superclass for continuation lines of Text-variants. | |
27 - `Definition`: Second line of potential definition_list_item. | |
28 - `Line`: Second line of overlined section title or transition marker. | |
29 - `Struct`: An auxiliary collection class. | |
30 | |
31 :Exception classes: | |
32 - `MarkupError` | |
33 - `ParserError` | |
34 - `MarkupMismatch` | |
35 | |
36 :Functions: | |
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls. | |
38 - `unescape()`: Return a string, nulls removed or restored to backslashes. | |
39 | |
40 :Attributes: | |
41 - `state_classes`: set of State classes used with `RSTStateMachine`. | |
42 | |
43 Parser Overview | |
44 =============== | |
45 | |
46 The reStructuredText parser is implemented as a recursive state machine, | |
47 examining its input one line at a time. To understand how the parser works, | |
48 please first become familiar with the `docutils.statemachine` module. In the | |
49 description below, references are made to classes defined in this module; | |
50 please see the individual classes for details. | |
51 | |
52 Parsing proceeds as follows: | |
53 | |
54 1. The state machine examines each line of input, checking each of the | |
55 transition patterns of the state `Body`, in order, looking for a match. | |
56 The implicit transitions (blank lines and indentation) are checked before | |
57 any others. The 'text' transition is a catch-all (matches anything). | |
58 | |
59 2. The method associated with the matched transition pattern is called. | |
60 | |
61 A. Some transition methods are self-contained, appending elements to the | |
62 document tree (`Body.doctest` parses a doctest block). The parser's | |
63 current line index is advanced to the end of the element, and parsing | |
64 continues with step 1. | |
65 | |
66 B. Other transition methods trigger the creation of a nested state machine, | |
67 whose job is to parse a compound construct ('indent' does a block quote, | |
68 'bullet' does a bullet list, 'overline' does a section [first checking | |
69 for a valid section header], etc.). | |
70 | |
71 - In the case of lists and explicit markup, a one-off state machine is | |
72 created and run to parse contents of the first item. | |
73 | |
74 - A new state machine is created and its initial state is set to the | |
75 appropriate specialized state (`BulletList` in the case of the | |
76 'bullet' transition; see `SpecializedBody` for more detail). This | |
77 state machine is run to parse the compound element (or series of | |
78 explicit markup elements), and returns as soon as a non-member element | |
79 is encountered. For example, the `BulletList` state machine ends as | |
80 soon as it encounters an element which is not a list item of that | |
81 bullet list. The optional omission of inter-element blank lines is | |
82 enabled by this nested state machine. | |
83 | |
84 - The current line index is advanced to the end of the elements parsed, | |
85 and parsing continues with step 1. | |
86 | |
87 C. The result of the 'text' transition depends on the next line of text. | |
88 The current state is changed to `Text`, under which the second line is | |
89 examined. If the second line is: | |
90 | |
91 - Indented: The element is a definition list item, and parsing proceeds | |
92 similarly to step 2.B, using the `DefinitionList` state. | |
93 | |
94 - A line of uniform punctuation characters: The element is a section | |
95 header; again, parsing proceeds as in step 2.B, and `Body` is still | |
96 used. | |
97 | |
98 - Anything else: The element is a paragraph, which is examined for | |
99 inline markup and appended to the parent element. Processing | |
100 continues with step 1. | |
101 """ | |
102 | |
103 __docformat__ = 'reStructuredText' | |
104 | |
105 | |
106 import sys | |
107 import re | |
108 from types import FunctionType, MethodType | |
109 | |
110 from docutils import nodes, statemachine, utils | |
111 from docutils import ApplicationError, DataError | |
112 from docutils.statemachine import StateMachineWS, StateWS | |
113 from docutils.nodes import fully_normalize_name as normalize_name | |
114 from docutils.nodes import whitespace_normalize_name | |
115 import docutils.parsers.rst | |
116 from docutils.parsers.rst import directives, languages, tableparser, roles | |
117 from docutils.parsers.rst.languages import en as _fallback_language_module | |
118 from docutils.utils import escape2null, unescape, column_width | |
119 from docutils.utils import punctuation_chars, roman, urischemes | |
120 from docutils.utils import split_escaped_whitespace | |
121 | |
122 class MarkupError(DataError): pass | |
123 class UnknownInterpretedRoleError(DataError): pass | |
124 class InterpretedRoleNotImplementedError(DataError): pass | |
125 class ParserError(ApplicationError): pass | |
126 class MarkupMismatch(Exception): pass | |
127 | |
128 | |
129 class Struct(object): | |
130 | |
131 """Stores data attributes for dotted-attribute access.""" | |
132 | |
133 def __init__(self, **keywordargs): | |
134 self.__dict__.update(keywordargs) | |
135 | |
136 | |
137 class RSTStateMachine(StateMachineWS): | |
138 | |
139 """ | |
140 reStructuredText's master StateMachine. | |
141 | |
142 The entry point to reStructuredText parsing is the `run()` method. | |
143 """ | |
144 | |
145 def run(self, input_lines, document, input_offset=0, match_titles=True, | |
146 inliner=None): | |
147 """ | |
148 Parse `input_lines` and modify the `document` node in place. | |
149 | |
150 Extend `StateMachineWS.run()`: set up parse-global data and | |
151 run the StateMachine. | |
152 """ | |
153 self.language = languages.get_language( | |
154 document.settings.language_code) | |
155 self.match_titles = match_titles | |
156 if inliner is None: | |
157 inliner = Inliner() | |
158 inliner.init_customizations(document.settings) | |
159 self.memo = Struct(document=document, | |
160 reporter=document.reporter, | |
161 language=self.language, | |
162 title_styles=[], | |
163 section_level=0, | |
164 section_bubble_up_kludge=False, | |
165 inliner=inliner) | |
166 self.document = document | |
167 self.attach_observer(document.note_source) | |
168 self.reporter = self.memo.reporter | |
169 self.node = document | |
170 results = StateMachineWS.run(self, input_lines, input_offset, | |
171 input_source=document['source']) | |
172 assert results == [], 'RSTStateMachine.run() results should be empty!' | |
173 self.node = self.memo = None # remove unneeded references | |
174 | |
175 | |
176 class NestedStateMachine(StateMachineWS): | |
177 | |
178 """ | |
179 StateMachine run from within other StateMachine runs, to parse nested | |
180 document structures. | |
181 """ | |
182 | |
183 def run(self, input_lines, input_offset, memo, node, match_titles=True): | |
184 """ | |
185 Parse `input_lines` and populate a `docutils.nodes.document` instance. | |
186 | |
187 Extend `StateMachineWS.run()`: set up document-wide data. | |
188 """ | |
189 self.match_titles = match_titles | |
190 self.memo = memo | |
191 self.document = memo.document | |
192 self.attach_observer(self.document.note_source) | |
193 self.reporter = memo.reporter | |
194 self.language = memo.language | |
195 self.node = node | |
196 results = StateMachineWS.run(self, input_lines, input_offset) | |
197 assert results == [], ('NestedStateMachine.run() results should be ' | |
198 'empty!') | |
199 return results | |
200 | |
201 | |
202 class RSTState(StateWS): | |
203 | |
204 """ | |
205 reStructuredText State superclass. | |
206 | |
207 Contains methods used by all State subclasses. | |
208 """ | |
209 | |
210 nested_sm = NestedStateMachine | |
211 nested_sm_cache = [] | |
212 | |
213 def __init__(self, state_machine, debug=False): | |
214 self.nested_sm_kwargs = {'state_classes': state_classes, | |
215 'initial_state': 'Body'} | |
216 StateWS.__init__(self, state_machine, debug) | |
217 | |
218 def runtime_init(self): | |
219 StateWS.runtime_init(self) | |
220 memo = self.state_machine.memo | |
221 self.memo = memo | |
222 self.reporter = memo.reporter | |
223 self.inliner = memo.inliner | |
224 self.document = memo.document | |
225 self.parent = self.state_machine.node | |
226 # enable the reporter to determine source and source-line | |
227 if not hasattr(self.reporter, 'get_source_and_line'): | |
228 self.reporter.get_source_and_line = self.state_machine.get_source_and_line | |
229 | |
230 | |
231 def goto_line(self, abs_line_offset): | |
232 """ | |
233 Jump to input line `abs_line_offset`, ignoring jumps past the end. | |
234 """ | |
235 try: | |
236 self.state_machine.goto_line(abs_line_offset) | |
237 except EOFError: | |
238 pass | |
239 | |
240 def no_match(self, context, transitions): | |
241 """ | |
242 Override `StateWS.no_match` to generate a system message. | |
243 | |
244 This code should never be run. | |
245 """ | |
246 self.reporter.severe( | |
247 'Internal error: no transition pattern match. State: "%s"; ' | |
248 'transitions: %s; context: %s; current line: %r.' | |
249 % (self.__class__.__name__, transitions, context, | |
250 self.state_machine.line)) | |
251 return context, None, [] | |
252 | |
253 def bof(self, context): | |
254 """Called at beginning of file.""" | |
255 return [], [] | |
256 | |
257 def nested_parse(self, block, input_offset, node, match_titles=False, | |
258 state_machine_class=None, state_machine_kwargs=None): | |
259 """ | |
260 Create a new StateMachine rooted at `node` and run it over the input | |
261 `block`. | |
262 """ | |
263 use_default = 0 | |
264 if state_machine_class is None: | |
265 state_machine_class = self.nested_sm | |
266 use_default += 1 | |
267 if state_machine_kwargs is None: | |
268 state_machine_kwargs = self.nested_sm_kwargs | |
269 use_default += 1 | |
270 block_length = len(block) | |
271 | |
272 state_machine = None | |
273 if use_default == 2: | |
274 try: | |
275 state_machine = self.nested_sm_cache.pop() | |
276 except IndexError: | |
277 pass | |
278 if not state_machine: | |
279 state_machine = state_machine_class(debug=self.debug, | |
280 **state_machine_kwargs) | |
281 state_machine.run(block, input_offset, memo=self.memo, | |
282 node=node, match_titles=match_titles) | |
283 if use_default == 2: | |
284 self.nested_sm_cache.append(state_machine) | |
285 else: | |
286 state_machine.unlink() | |
287 new_offset = state_machine.abs_line_offset() | |
288 # No `block.parent` implies disconnected -- lines aren't in sync: | |
289 if block.parent and (len(block) - block_length) != 0: | |
290 # Adjustment for block if modified in nested parse: | |
291 self.state_machine.next_line(len(block) - block_length) | |
292 return new_offset | |
293 | |
294 def nested_list_parse(self, block, input_offset, node, initial_state, | |
295 blank_finish, | |
296 blank_finish_state=None, | |
297 extra_settings={}, | |
298 match_titles=False, | |
299 state_machine_class=None, | |
300 state_machine_kwargs=None): | |
301 """ | |
302 Create a new StateMachine rooted at `node` and run it over the input | |
303 `block`. Also keep track of optional intermediate blank lines and the | |
304 required final one. | |
305 """ | |
306 if state_machine_class is None: | |
307 state_machine_class = self.nested_sm | |
308 if state_machine_kwargs is None: | |
309 state_machine_kwargs = self.nested_sm_kwargs.copy() | |
310 state_machine_kwargs['initial_state'] = initial_state | |
311 state_machine = state_machine_class(debug=self.debug, | |
312 **state_machine_kwargs) | |
313 if blank_finish_state is None: | |
314 blank_finish_state = initial_state | |
315 state_machine.states[blank_finish_state].blank_finish = blank_finish | |
316 for key, value in extra_settings.items(): | |
317 setattr(state_machine.states[initial_state], key, value) | |
318 state_machine.run(block, input_offset, memo=self.memo, | |
319 node=node, match_titles=match_titles) | |
320 blank_finish = state_machine.states[blank_finish_state].blank_finish | |
321 state_machine.unlink() | |
322 return state_machine.abs_line_offset(), blank_finish | |
323 | |
324 def section(self, title, source, style, lineno, messages): | |
325 """Check for a valid subsection and create one if it checks out.""" | |
326 if self.check_subsection(source, style, lineno): | |
327 self.new_subsection(title, lineno, messages) | |
328 | |
329 def check_subsection(self, source, style, lineno): | |
330 """ | |
331 Check for a valid subsection header. Return 1 (true) or None (false). | |
332 | |
333 When a new section is reached that isn't a subsection of the current | |
334 section, back up the line count (use ``previous_line(-x)``), then | |
335 ``raise EOFError``. The current StateMachine will finish, then the | |
336 calling StateMachine can re-examine the title. This will work its way | |
337 back up the calling chain until the correct section level isreached. | |
338 | |
339 @@@ Alternative: Evaluate the title, store the title info & level, and | |
340 back up the chain until that level is reached. Store in memo? Or | |
341 return in results? | |
342 | |
343 :Exception: `EOFError` when a sibling or supersection encountered. | |
344 """ | |
345 memo = self.memo | |
346 title_styles = memo.title_styles | |
347 mylevel = memo.section_level | |
348 try: # check for existing title style | |
349 level = title_styles.index(style) + 1 | |
350 except ValueError: # new title style | |
351 if len(title_styles) == memo.section_level: # new subsection | |
352 title_styles.append(style) | |
353 return 1 | |
354 else: # not at lowest level | |
355 self.parent += self.title_inconsistent(source, lineno) | |
356 return None | |
357 if level <= mylevel: # sibling or supersection | |
358 memo.section_level = level # bubble up to parent section | |
359 if len(style) == 2: | |
360 memo.section_bubble_up_kludge = True | |
361 # back up 2 lines for underline title, 3 for overline title | |
362 self.state_machine.previous_line(len(style) + 1) | |
363 raise EOFError # let parent section re-evaluate | |
364 if level == mylevel + 1: # immediate subsection | |
365 return 1 | |
366 else: # invalid subsection | |
367 self.parent += self.title_inconsistent(source, lineno) | |
368 return None | |
369 | |
370 def title_inconsistent(self, sourcetext, lineno): | |
371 error = self.reporter.severe( | |
372 'Title level inconsistent:', nodes.literal_block('', sourcetext), | |
373 line=lineno) | |
374 return error | |
375 | |
376 def new_subsection(self, title, lineno, messages): | |
377 """Append new subsection to document tree. On return, check level.""" | |
378 memo = self.memo | |
379 mylevel = memo.section_level | |
380 memo.section_level += 1 | |
381 section_node = nodes.section() | |
382 self.parent += section_node | |
383 textnodes, title_messages = self.inline_text(title, lineno) | |
384 titlenode = nodes.title(title, '', *textnodes) | |
385 name = normalize_name(titlenode.astext()) | |
386 section_node['names'].append(name) | |
387 section_node += titlenode | |
388 section_node += messages | |
389 section_node += title_messages | |
390 self.document.note_implicit_target(section_node, section_node) | |
391 offset = self.state_machine.line_offset + 1 | |
392 absoffset = self.state_machine.abs_line_offset() + 1 | |
393 newabsoffset = self.nested_parse( | |
394 self.state_machine.input_lines[offset:], input_offset=absoffset, | |
395 node=section_node, match_titles=True) | |
396 self.goto_line(newabsoffset) | |
397 if memo.section_level <= mylevel: # can't handle next section? | |
398 raise EOFError # bubble up to supersection | |
399 # reset section_level; next pass will detect it properly | |
400 memo.section_level = mylevel | |
401 | |
402 def paragraph(self, lines, lineno): | |
403 """ | |
404 Return a list (paragraph & messages) & a boolean: literal_block next? | |
405 """ | |
406 data = '\n'.join(lines).rstrip() | |
407 if re.search(r'(?<!\\)(\\\\)*::$', data): | |
408 if len(data) == 2: | |
409 return [], 1 | |
410 elif data[-3] in ' \n': | |
411 text = data[:-3].rstrip() | |
412 else: | |
413 text = data[:-1] | |
414 literalnext = 1 | |
415 else: | |
416 text = data | |
417 literalnext = 0 | |
418 textnodes, messages = self.inline_text(text, lineno) | |
419 p = nodes.paragraph(data, '', *textnodes) | |
420 p.source, p.line = self.state_machine.get_source_and_line(lineno) | |
421 return [p] + messages, literalnext | |
422 | |
423 def inline_text(self, text, lineno): | |
424 """ | |
425 Return 2 lists: nodes (text and inline elements), and system_messages. | |
426 """ | |
427 nodes, messages = self.inliner.parse(text, lineno, | |
428 self.memo, self.parent) | |
429 return nodes, messages | |
430 | |
431 def unindent_warning(self, node_name): | |
432 # the actual problem is one line below the current line | |
433 lineno = self.state_machine.abs_line_number()+1 | |
434 return self.reporter.warning('%s ends without a blank line; ' | |
435 'unexpected unindent.' % node_name, | |
436 line=lineno) | |
437 | |
438 | |
439 def build_regexp(definition, compile=True): | |
440 """ | |
441 Build, compile and return a regular expression based on `definition`. | |
442 | |
443 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts), | |
444 where "parts" is a list of regular expressions and/or regular | |
445 expression definitions to be joined into an or-group. | |
446 """ | |
447 name, prefix, suffix, parts = definition | |
448 part_strings = [] | |
449 for part in parts: | |
450 if isinstance(part, tuple): | |
451 part_strings.append(build_regexp(part, None)) | |
452 else: | |
453 part_strings.append(part) | |
454 or_group = '|'.join(part_strings) | |
455 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals() | |
456 if compile: | |
457 return re.compile(regexp, re.UNICODE) | |
458 else: | |
459 return regexp | |
460 | |
461 | |
462 class Inliner(object): | |
463 | |
464 """ | |
465 Parse inline markup; call the `parse()` method. | |
466 """ | |
467 | |
468 def __init__(self): | |
469 self.implicit_dispatch = [] | |
470 """List of (pattern, bound method) tuples, used by | |
471 `self.implicit_inline`.""" | |
472 | |
473 def init_customizations(self, settings): | |
474 # lookahead and look-behind expressions for inline markup rules | |
475 if getattr(settings, 'character_level_inline_markup', False): | |
476 start_string_prefix = u'(^|(?<!\x00))' | |
477 end_string_suffix = u'' | |
478 else: | |
479 start_string_prefix = (u'(^|(?<=\\s|[%s%s]))' % | |
480 (punctuation_chars.openers, | |
481 punctuation_chars.delimiters)) | |
482 end_string_suffix = (u'($|(?=\\s|[\x00%s%s%s]))' % | |
483 (punctuation_chars.closing_delimiters, | |
484 punctuation_chars.delimiters, | |
485 punctuation_chars.closers)) | |
486 args = locals().copy() | |
487 args.update(vars(self.__class__)) | |
488 | |
489 parts = ('initial_inline', start_string_prefix, '', | |
490 [('start', '', self.non_whitespace_after, # simple start-strings | |
491 [r'\*\*', # strong | |
492 r'\*(?!\*)', # emphasis but not strong | |
493 r'``', # literal | |
494 r'_`', # inline internal target | |
495 r'\|(?!\|)'] # substitution reference | |
496 ), | |
497 ('whole', '', end_string_suffix, # whole constructs | |
498 [# reference name & end-string | |
499 r'(?P<refname>%s)(?P<refend>__?)' % self.simplename, | |
500 ('footnotelabel', r'\[', r'(?P<fnend>\]_)', | |
501 [r'[0-9]+', # manually numbered | |
502 r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?) | |
503 r'\*', # auto-symbol | |
504 r'(?P<citationlabel>%s)' % self.simplename] # citation reference | |
505 ) | |
506 ] | |
507 ), | |
508 ('backquote', # interpreted text or phrase reference | |
509 '(?P<role>(:%s:)?)' % self.simplename, # optional role | |
510 self.non_whitespace_after, | |
511 ['`(?!`)'] # but not literal | |
512 ) | |
513 ] | |
514 ) | |
515 self.start_string_prefix = start_string_prefix | |
516 self.end_string_suffix = end_string_suffix | |
517 self.parts = parts | |
518 | |
519 self.patterns = Struct( | |
520 initial=build_regexp(parts), | |
521 emphasis=re.compile(self.non_whitespace_escape_before | |
522 + r'(\*)' + end_string_suffix, re.UNICODE), | |
523 strong=re.compile(self.non_whitespace_escape_before | |
524 + r'(\*\*)' + end_string_suffix, re.UNICODE), | |
525 interpreted_or_phrase_ref=re.compile( | |
526 r""" | |
527 %(non_unescaped_whitespace_escape_before)s | |
528 ( | |
529 ` | |
530 (?P<suffix> | |
531 (?P<role>:%(simplename)s:)? | |
532 (?P<refend>__?)? | |
533 ) | |
534 ) | |
535 %(end_string_suffix)s | |
536 """ % args, re.VERBOSE | re.UNICODE), | |
537 embedded_link=re.compile( | |
538 r""" | |
539 ( | |
540 (?:[ \n]+|^) # spaces or beginning of line/string | |
541 < # open bracket | |
542 %(non_whitespace_after)s | |
543 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets | |
544 %(non_whitespace_escape_before)s | |
545 > # close bracket | |
546 ) | |
547 $ # end of string | |
548 """ % args, re.VERBOSE | re.UNICODE), | |
549 literal=re.compile(self.non_whitespace_before + '(``)' | |
550 + end_string_suffix, re.UNICODE), | |
551 target=re.compile(self.non_whitespace_escape_before | |
552 + r'(`)' + end_string_suffix, re.UNICODE), | |
553 substitution_ref=re.compile(self.non_whitespace_escape_before | |
554 + r'(\|_{0,2})' | |
555 + end_string_suffix, re.UNICODE), | |
556 email=re.compile(self.email_pattern % args + '$', | |
557 re.VERBOSE | re.UNICODE), | |
558 uri=re.compile( | |
559 (r""" | |
560 %(start_string_prefix)s | |
561 (?P<whole> | |
562 (?P<absolute> # absolute URI | |
563 (?P<scheme> # scheme (http, ftp, mailto) | |
564 [a-zA-Z][a-zA-Z0-9.+-]* | |
565 ) | |
566 : | |
567 ( | |
568 ( # either: | |
569 (//?)? # hierarchical URI | |
570 %(uric)s* # URI characters | |
571 %(uri_end)s # final URI char | |
572 ) | |
573 ( # optional query | |
574 \?%(uric)s* | |
575 %(uri_end)s | |
576 )? | |
577 ( # optional fragment | |
578 \#%(uric)s* | |
579 %(uri_end)s | |
580 )? | |
581 ) | |
582 ) | |
583 | # *OR* | |
584 (?P<email> # email address | |
585 """ + self.email_pattern + r""" | |
586 ) | |
587 ) | |
588 %(end_string_suffix)s | |
589 """) % args, re.VERBOSE | re.UNICODE), | |
590 pep=re.compile( | |
591 r""" | |
592 %(start_string_prefix)s | |
593 ( | |
594 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file | |
595 | | |
596 (PEP\s+(?P<pepnum2>\d+)) # reference by name | |
597 ) | |
598 %(end_string_suffix)s""" % args, re.VERBOSE | re.UNICODE), | |
599 rfc=re.compile( | |
600 r""" | |
601 %(start_string_prefix)s | |
602 (RFC(-|\s+)?(?P<rfcnum>\d+)) | |
603 %(end_string_suffix)s""" % args, re.VERBOSE | re.UNICODE)) | |
604 | |
605 self.implicit_dispatch.append((self.patterns.uri, | |
606 self.standalone_uri)) | |
607 if settings.pep_references: | |
608 self.implicit_dispatch.append((self.patterns.pep, | |
609 self.pep_reference)) | |
610 if settings.rfc_references: | |
611 self.implicit_dispatch.append((self.patterns.rfc, | |
612 self.rfc_reference)) | |
613 | |
614 def parse(self, text, lineno, memo, parent): | |
615 # Needs to be refactored for nested inline markup. | |
616 # Add nested_parse() method? | |
617 """ | |
618 Return 2 lists: nodes (text and inline elements), and system_messages. | |
619 | |
620 Using `self.patterns.initial`, a pattern which matches start-strings | |
621 (emphasis, strong, interpreted, phrase reference, literal, | |
622 substitution reference, and inline target) and complete constructs | |
623 (simple reference, footnote reference), search for a candidate. When | |
624 one is found, check for validity (e.g., not a quoted '*' character). | |
625 If valid, search for the corresponding end string if applicable, and | |
626 check it for validity. If not found or invalid, generate a warning | |
627 and ignore the start-string. Implicit inline markup (e.g. standalone | |
628 URIs) is found last. | |
629 """ | |
630 self.reporter = memo.reporter | |
631 self.document = memo.document | |
632 self.language = memo.language | |
633 self.parent = parent | |
634 pattern_search = self.patterns.initial.search | |
635 dispatch = self.dispatch | |
636 remaining = escape2null(text) | |
637 processed = [] | |
638 unprocessed = [] | |
639 messages = [] | |
640 while remaining: | |
641 match = pattern_search(remaining) | |
642 if match: | |
643 groups = match.groupdict() | |
644 method = dispatch[groups['start'] or groups['backquote'] | |
645 or groups['refend'] or groups['fnend']] | |
646 before, inlines, remaining, sysmessages = method(self, match, | |
647 lineno) | |
648 unprocessed.append(before) | |
649 messages += sysmessages | |
650 if inlines: | |
651 processed += self.implicit_inline(''.join(unprocessed), | |
652 lineno) | |
653 processed += inlines | |
654 unprocessed = [] | |
655 else: | |
656 break | |
657 remaining = ''.join(unprocessed) + remaining | |
658 if remaining: | |
659 processed += self.implicit_inline(remaining, lineno) | |
660 return processed, messages | |
661 | |
662 # Inline object recognition | |
663 # ------------------------- | |
664 # See also init_customizations(). | |
665 non_whitespace_before = r'(?<!\s)' | |
666 non_whitespace_escape_before = r'(?<![\s\x00])' | |
667 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])' | |
668 non_whitespace_after = r'(?!\s)' | |
669 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together): | |
670 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*' | |
671 # Valid URI characters (see RFC 2396 & RFC 2732); | |
672 # final \x00 allows backslash escapes in URIs: | |
673 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]""" | |
674 # Delimiter indicating the end of a URI (not part of the URI): | |
675 uri_end_delim = r"""[>]""" | |
676 # Last URI character; same as uric but no punctuation: | |
677 urilast = r"""[_~*/=+a-zA-Z0-9]""" | |
678 # End of a URI (either 'urilast' or 'uric followed by a | |
679 # uri_end_delim'): | |
680 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals() | |
681 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]""" | |
682 email_pattern = r""" | |
683 %(emailc)s+(?:\.%(emailc)s+)* # name | |
684 (?<!\x00)@ # at | |
685 %(emailc)s+(?:\.%(emailc)s*)* # host | |
686 %(uri_end)s # final URI char | |
687 """ | |
688 | |
689 def quoted_start(self, match): | |
690 """Test if inline markup start-string is 'quoted'. | |
691 | |
692 'Quoted' in this context means the start-string is enclosed in a pair | |
693 of matching opening/closing delimiters (not necessarily quotes) | |
694 or at the end of the match. | |
695 """ | |
696 string = match.string | |
697 start = match.start() | |
698 if start == 0: # start-string at beginning of text | |
699 return False | |
700 prestart = string[start - 1] | |
701 try: | |
702 poststart = string[match.end()] | |
703 except IndexError: # start-string at end of text | |
704 return True # not "quoted" but no markup start-string either | |
705 return punctuation_chars.match_chars(prestart, poststart) | |
706 | |
707 def inline_obj(self, match, lineno, end_pattern, nodeclass, | |
708 restore_backslashes=False): | |
709 string = match.string | |
710 matchstart = match.start('start') | |
711 matchend = match.end('start') | |
712 if self.quoted_start(match): | |
713 return (string[:matchend], [], string[matchend:], [], '') | |
714 endmatch = end_pattern.search(string[matchend:]) | |
715 if endmatch and endmatch.start(1): # 1 or more chars | |
716 text = endmatch.string[:endmatch.start(1)] | |
717 if restore_backslashes: | |
718 text = unescape(text, True) | |
719 textend = matchend + endmatch.end(1) | |
720 rawsource = unescape(string[matchstart:textend], True) | |
721 node = nodeclass(rawsource, text) | |
722 return (string[:matchstart], [node], | |
723 string[textend:], [], endmatch.group(1)) | |
724 msg = self.reporter.warning( | |
725 'Inline %s start-string without end-string.' | |
726 % nodeclass.__name__, line=lineno) | |
727 text = unescape(string[matchstart:matchend], True) | |
728 prb = self.problematic(text, text, msg) | |
729 return string[:matchstart], [prb], string[matchend:], [msg], '' | |
730 | |
731 def problematic(self, text, rawsource, message): | |
732 msgid = self.document.set_id(message, self.parent) | |
733 problematic = nodes.problematic(rawsource, text, refid=msgid) | |
734 prbid = self.document.set_id(problematic) | |
735 message.add_backref(prbid) | |
736 return problematic | |
737 | |
738 def emphasis(self, match, lineno): | |
739 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
740 match, lineno, self.patterns.emphasis, nodes.emphasis) | |
741 return before, inlines, remaining, sysmessages | |
742 | |
743 def strong(self, match, lineno): | |
744 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
745 match, lineno, self.patterns.strong, nodes.strong) | |
746 return before, inlines, remaining, sysmessages | |
747 | |
748 def interpreted_or_phrase_ref(self, match, lineno): | |
749 end_pattern = self.patterns.interpreted_or_phrase_ref | |
750 string = match.string | |
751 matchstart = match.start('backquote') | |
752 matchend = match.end('backquote') | |
753 rolestart = match.start('role') | |
754 role = match.group('role') | |
755 position = '' | |
756 if role: | |
757 role = role[1:-1] | |
758 position = 'prefix' | |
759 elif self.quoted_start(match): | |
760 return (string[:matchend], [], string[matchend:], []) | |
761 endmatch = end_pattern.search(string[matchend:]) | |
762 if endmatch and endmatch.start(1): # 1 or more chars | |
763 textend = matchend + endmatch.end() | |
764 if endmatch.group('role'): | |
765 if role: | |
766 msg = self.reporter.warning( | |
767 'Multiple roles in interpreted text (both ' | |
768 'prefix and suffix present; only one allowed).', | |
769 line=lineno) | |
770 text = unescape(string[rolestart:textend], True) | |
771 prb = self.problematic(text, text, msg) | |
772 return string[:rolestart], [prb], string[textend:], [msg] | |
773 role = endmatch.group('suffix')[1:-1] | |
774 position = 'suffix' | |
775 escaped = endmatch.string[:endmatch.start(1)] | |
776 rawsource = unescape(string[matchstart:textend], True) | |
777 if rawsource[-1:] == '_': | |
778 if role: | |
779 msg = self.reporter.warning( | |
780 'Mismatch: both interpreted text role %s and ' | |
781 'reference suffix.' % position, line=lineno) | |
782 text = unescape(string[rolestart:textend], True) | |
783 prb = self.problematic(text, text, msg) | |
784 return string[:rolestart], [prb], string[textend:], [msg] | |
785 return self.phrase_ref(string[:matchstart], string[textend:], | |
786 rawsource, escaped) | |
787 else: | |
788 rawsource = unescape(string[rolestart:textend], True) | |
789 nodelist, messages = self.interpreted(rawsource, escaped, role, | |
790 lineno) | |
791 return (string[:rolestart], nodelist, | |
792 string[textend:], messages) | |
793 msg = self.reporter.warning( | |
794 'Inline interpreted text or phrase reference start-string ' | |
795 'without end-string.', line=lineno) | |
796 text = unescape(string[matchstart:matchend], True) | |
797 prb = self.problematic(text, text, msg) | |
798 return string[:matchstart], [prb], string[matchend:], [msg] | |
799 | |
800 def phrase_ref(self, before, after, rawsource, escaped, text=None): | |
801 # `text` is ignored (since 0.16) | |
802 match = self.patterns.embedded_link.search(escaped) | |
803 if match: # embedded <URI> or <alias_> | |
804 text = escaped[:match.start(0)] | |
805 unescaped = unescape(text) | |
806 rawtext = unescape(text, True) | |
807 aliastext = match.group(2) | |
808 rawaliastext = unescape(aliastext, True) | |
809 underscore_escaped = rawaliastext.endswith(r'\_') | |
810 if aliastext.endswith('_') and not (underscore_escaped | |
811 or self.patterns.uri.match(aliastext)): | |
812 aliastype = 'name' | |
813 alias = normalize_name(unescape(aliastext[:-1])) | |
814 target = nodes.target(match.group(1), refname=alias) | |
815 target.indirect_reference_name = whitespace_normalize_name( | |
816 unescape(aliastext[:-1])) | |
817 else: | |
818 aliastype = 'uri' | |
819 # remove unescaped whitespace | |
820 alias_parts = split_escaped_whitespace(match.group(2)) | |
821 alias = ' '.join(''.join(part.split()) | |
822 for part in alias_parts) | |
823 alias = self.adjust_uri(unescape(alias)) | |
824 if alias.endswith(r'\_'): | |
825 alias = alias[:-2] + '_' | |
826 target = nodes.target(match.group(1), refuri=alias) | |
827 target.referenced = 1 | |
828 if not aliastext: | |
829 raise ApplicationError('problem with embedded link: %r' | |
830 % aliastext) | |
831 if not text: | |
832 text = alias | |
833 unescaped = unescape(text) | |
834 rawtext = rawaliastext | |
835 else: | |
836 text = escaped | |
837 unescaped = unescape(text) | |
838 target = None | |
839 rawtext = unescape(escaped, True) | |
840 | |
841 refname = normalize_name(unescaped) | |
842 reference = nodes.reference(rawsource, text, | |
843 name=whitespace_normalize_name(unescaped)) | |
844 reference[0].rawsource = rawtext | |
845 | |
846 node_list = [reference] | |
847 | |
848 if rawsource[-2:] == '__': | |
849 if target and (aliastype == 'name'): | |
850 reference['refname'] = alias | |
851 self.document.note_refname(reference) | |
852 # self.document.note_indirect_target(target) # required? | |
853 elif target and (aliastype == 'uri'): | |
854 reference['refuri'] = alias | |
855 else: | |
856 reference['anonymous'] = 1 | |
857 else: | |
858 if target: | |
859 target['names'].append(refname) | |
860 if aliastype == 'name': | |
861 reference['refname'] = alias | |
862 self.document.note_indirect_target(target) | |
863 self.document.note_refname(reference) | |
864 else: | |
865 reference['refuri'] = alias | |
866 self.document.note_explicit_target(target, self.parent) | |
867 # target.note_referenced_by(name=refname) | |
868 node_list.append(target) | |
869 else: | |
870 reference['refname'] = refname | |
871 self.document.note_refname(reference) | |
872 return before, node_list, after, [] | |
873 | |
874 | |
875 def adjust_uri(self, uri): | |
876 match = self.patterns.email.match(uri) | |
877 if match: | |
878 return 'mailto:' + uri | |
879 else: | |
880 return uri | |
881 | |
882 def interpreted(self, rawsource, text, role, lineno): | |
883 role_fn, messages = roles.role(role, self.language, lineno, | |
884 self.reporter) | |
885 if role_fn: | |
886 nodes, messages2 = role_fn(role, rawsource, text, lineno, self) | |
887 return nodes, messages + messages2 | |
888 else: | |
889 msg = self.reporter.error( | |
890 'Unknown interpreted text role "%s".' % role, | |
891 line=lineno) | |
892 return ([self.problematic(rawsource, rawsource, msg)], | |
893 messages + [msg]) | |
894 | |
895 def literal(self, match, lineno): | |
896 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
897 match, lineno, self.patterns.literal, nodes.literal, | |
898 restore_backslashes=True) | |
899 return before, inlines, remaining, sysmessages | |
900 | |
901 def inline_internal_target(self, match, lineno): | |
902 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
903 match, lineno, self.patterns.target, nodes.target) | |
904 if inlines and isinstance(inlines[0], nodes.target): | |
905 assert len(inlines) == 1 | |
906 target = inlines[0] | |
907 name = normalize_name(target.astext()) | |
908 target['names'].append(name) | |
909 self.document.note_explicit_target(target, self.parent) | |
910 return before, inlines, remaining, sysmessages | |
911 | |
912 def substitution_reference(self, match, lineno): | |
913 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
914 match, lineno, self.patterns.substitution_ref, | |
915 nodes.substitution_reference) | |
916 if len(inlines) == 1: | |
917 subref_node = inlines[0] | |
918 if isinstance(subref_node, nodes.substitution_reference): | |
919 subref_text = subref_node.astext() | |
920 self.document.note_substitution_ref(subref_node, subref_text) | |
921 if endstring[-1:] == '_': | |
922 reference_node = nodes.reference( | |
923 '|%s%s' % (subref_text, endstring), '') | |
924 if endstring[-2:] == '__': | |
925 reference_node['anonymous'] = 1 | |
926 else: | |
927 reference_node['refname'] = normalize_name(subref_text) | |
928 self.document.note_refname(reference_node) | |
929 reference_node += subref_node | |
930 inlines = [reference_node] | |
931 return before, inlines, remaining, sysmessages | |
932 | |
933 def footnote_reference(self, match, lineno): | |
934 """ | |
935 Handles `nodes.footnote_reference` and `nodes.citation_reference` | |
936 elements. | |
937 """ | |
938 label = match.group('footnotelabel') | |
939 refname = normalize_name(label) | |
940 string = match.string | |
941 before = string[:match.start('whole')] | |
942 remaining = string[match.end('whole'):] | |
943 if match.group('citationlabel'): | |
944 refnode = nodes.citation_reference('[%s]_' % label, | |
945 refname=refname) | |
946 refnode += nodes.Text(label) | |
947 self.document.note_citation_ref(refnode) | |
948 else: | |
949 refnode = nodes.footnote_reference('[%s]_' % label) | |
950 if refname[0] == '#': | |
951 refname = refname[1:] | |
952 refnode['auto'] = 1 | |
953 self.document.note_autofootnote_ref(refnode) | |
954 elif refname == '*': | |
955 refname = '' | |
956 refnode['auto'] = '*' | |
957 self.document.note_symbol_footnote_ref( | |
958 refnode) | |
959 else: | |
960 refnode += nodes.Text(label) | |
961 if refname: | |
962 refnode['refname'] = refname | |
963 self.document.note_footnote_ref(refnode) | |
964 if utils.get_trim_footnote_ref_space(self.document.settings): | |
965 before = before.rstrip() | |
966 return (before, [refnode], remaining, []) | |
967 | |
968 def reference(self, match, lineno, anonymous=False): | |
969 referencename = match.group('refname') | |
970 refname = normalize_name(referencename) | |
971 referencenode = nodes.reference( | |
972 referencename + match.group('refend'), referencename, | |
973 name=whitespace_normalize_name(referencename)) | |
974 referencenode[0].rawsource = referencename | |
975 if anonymous: | |
976 referencenode['anonymous'] = 1 | |
977 else: | |
978 referencenode['refname'] = refname | |
979 self.document.note_refname(referencenode) | |
980 string = match.string | |
981 matchstart = match.start('whole') | |
982 matchend = match.end('whole') | |
983 return (string[:matchstart], [referencenode], string[matchend:], []) | |
984 | |
985 def anonymous_reference(self, match, lineno): | |
986 return self.reference(match, lineno, anonymous=1) | |
987 | |
988 def standalone_uri(self, match, lineno): | |
989 if (not match.group('scheme') | |
990 or match.group('scheme').lower() in urischemes.schemes): | |
991 if match.group('email'): | |
992 addscheme = 'mailto:' | |
993 else: | |
994 addscheme = '' | |
995 text = match.group('whole') | |
996 refuri = addscheme + unescape(text) | |
997 reference = nodes.reference(unescape(text, True), text, | |
998 refuri=refuri) | |
999 return [reference] | |
1000 else: # not a valid scheme | |
1001 raise MarkupMismatch | |
1002 | |
1003 def pep_reference(self, match, lineno): | |
1004 text = match.group(0) | |
1005 if text.startswith('pep-'): | |
1006 pepnum = int(unescape(match.group('pepnum1'))) | |
1007 elif text.startswith('PEP'): | |
1008 pepnum = int(unescape(match.group('pepnum2'))) | |
1009 else: | |
1010 raise MarkupMismatch | |
1011 ref = (self.document.settings.pep_base_url | |
1012 + self.document.settings.pep_file_url_template % pepnum) | |
1013 return [nodes.reference(unescape(text, True), text, refuri=ref)] | |
1014 | |
1015 rfc_url = 'rfc%d.html' | |
1016 | |
1017 def rfc_reference(self, match, lineno): | |
1018 text = match.group(0) | |
1019 if text.startswith('RFC'): | |
1020 rfcnum = int(unescape(match.group('rfcnum'))) | |
1021 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum | |
1022 else: | |
1023 raise MarkupMismatch | |
1024 return [nodes.reference(unescape(text, True), text, refuri=ref)] | |
1025 | |
1026 def implicit_inline(self, text, lineno): | |
1027 """ | |
1028 Check each of the patterns in `self.implicit_dispatch` for a match, | |
1029 and dispatch to the stored method for the pattern. Recursively check | |
1030 the text before and after the match. Return a list of `nodes.Text` | |
1031 and inline element nodes. | |
1032 """ | |
1033 if not text: | |
1034 return [] | |
1035 for pattern, method in self.implicit_dispatch: | |
1036 match = pattern.search(text) | |
1037 if match: | |
1038 try: | |
1039 # Must recurse on strings before *and* after the match; | |
1040 # there may be multiple patterns. | |
1041 return (self.implicit_inline(text[:match.start()], lineno) | |
1042 + method(match, lineno) + | |
1043 self.implicit_inline(text[match.end():], lineno)) | |
1044 except MarkupMismatch: | |
1045 pass | |
1046 return [nodes.Text(text, unescape(text, True))] | |
1047 | |
1048 dispatch = {'*': emphasis, | |
1049 '**': strong, | |
1050 '`': interpreted_or_phrase_ref, | |
1051 '``': literal, | |
1052 '_`': inline_internal_target, | |
1053 ']_': footnote_reference, | |
1054 '|': substitution_reference, | |
1055 '_': reference, | |
1056 '__': anonymous_reference} | |
1057 | |
1058 | |
1059 def _loweralpha_to_int(s, _zero=(ord('a')-1)): | |
1060 return ord(s) - _zero | |
1061 | |
1062 def _upperalpha_to_int(s, _zero=(ord('A')-1)): | |
1063 return ord(s) - _zero | |
1064 | |
1065 def _lowerroman_to_int(s): | |
1066 return roman.fromRoman(s.upper()) | |
1067 | |
1068 | |
1069 class Body(RSTState): | |
1070 | |
1071 """ | |
1072 Generic classifier of the first line of a block. | |
1073 """ | |
1074 | |
1075 double_width_pad_char = tableparser.TableParser.double_width_pad_char | |
1076 """Padding character for East Asian double-width text.""" | |
1077 | |
1078 enum = Struct() | |
1079 """Enumerated list parsing information.""" | |
1080 | |
1081 enum.formatinfo = { | |
1082 'parens': Struct(prefix='(', suffix=')', start=1, end=-1), | |
1083 'rparen': Struct(prefix='', suffix=')', start=0, end=-1), | |
1084 'period': Struct(prefix='', suffix='.', start=0, end=-1)} | |
1085 enum.formats = enum.formatinfo.keys() | |
1086 enum.sequences = ['arabic', 'loweralpha', 'upperalpha', | |
1087 'lowerroman', 'upperroman'] # ORDERED! | |
1088 enum.sequencepats = {'arabic': '[0-9]+', | |
1089 'loweralpha': '[a-z]', | |
1090 'upperalpha': '[A-Z]', | |
1091 'lowerroman': '[ivxlcdm]+', | |
1092 'upperroman': '[IVXLCDM]+',} | |
1093 enum.converters = {'arabic': int, | |
1094 'loweralpha': _loweralpha_to_int, | |
1095 'upperalpha': _upperalpha_to_int, | |
1096 'lowerroman': _lowerroman_to_int, | |
1097 'upperroman': roman.fromRoman} | |
1098 | |
1099 enum.sequenceregexps = {} | |
1100 for sequence in enum.sequences: | |
1101 enum.sequenceregexps[sequence] = re.compile( | |
1102 enum.sequencepats[sequence] + '$', re.UNICODE) | |
1103 | |
1104 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$') | |
1105 """Matches the top (& bottom) of a full table).""" | |
1106 | |
1107 simple_table_top_pat = re.compile('=+( +=+)+ *$') | |
1108 """Matches the top of a simple table.""" | |
1109 | |
1110 simple_table_border_pat = re.compile('=+[ =]*$') | |
1111 """Matches the bottom & header bottom of a simple table.""" | |
1112 | |
1113 pats = {} | |
1114 """Fragments of patterns used by transitions.""" | |
1115 | |
1116 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]' | |
1117 pats['alpha'] = '[a-zA-Z]' | |
1118 pats['alphanum'] = '[a-zA-Z0-9]' | |
1119 pats['alphanumplus'] = '[a-zA-Z0-9_-]' | |
1120 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s' | |
1121 '|%(upperroman)s|#)' % enum.sequencepats) | |
1122 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats | |
1123 # @@@ Loosen up the pattern? Allow Unicode? | |
1124 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats | |
1125 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats | |
1126 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats | |
1127 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats | |
1128 | |
1129 for format in enum.formats: | |
1130 pats[format] = '(?P<%s>%s%s%s)' % ( | |
1131 format, re.escape(enum.formatinfo[format].prefix), | |
1132 pats['enum'], re.escape(enum.formatinfo[format].suffix)) | |
1133 | |
1134 patterns = { | |
1135 'bullet': u'[-+*\u2022\u2023\u2043]( +|$)', | |
1136 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats, | |
1137 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)', | |
1138 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, | |
1139 'doctest': r'>>>( +|$)', | |
1140 'line_block': r'\|( +|$)', | |
1141 'grid_table_top': grid_table_top_pat, | |
1142 'simple_table_top': simple_table_top_pat, | |
1143 'explicit_markup': r'\.\.( +|$)', | |
1144 'anonymous': r'__( +|$)', | |
1145 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats, | |
1146 'text': r''} | |
1147 initial_transitions = ( | |
1148 'bullet', | |
1149 'enumerator', | |
1150 'field_marker', | |
1151 'option_marker', | |
1152 'doctest', | |
1153 'line_block', | |
1154 'grid_table_top', | |
1155 'simple_table_top', | |
1156 'explicit_markup', | |
1157 'anonymous', | |
1158 'line', | |
1159 'text') | |
1160 | |
1161 def indent(self, match, context, next_state): | |
1162 """Block quote.""" | |
1163 indented, indent, line_offset, blank_finish = \ | |
1164 self.state_machine.get_indented() | |
1165 elements = self.block_quote(indented, line_offset) | |
1166 self.parent += elements | |
1167 if not blank_finish: | |
1168 self.parent += self.unindent_warning('Block quote') | |
1169 return context, next_state, [] | |
1170 | |
1171 def block_quote(self, indented, line_offset): | |
1172 elements = [] | |
1173 while indented: | |
1174 (blockquote_lines, | |
1175 attribution_lines, | |
1176 attribution_offset, | |
1177 indented, | |
1178 new_line_offset) = self.split_attribution(indented, line_offset) | |
1179 blockquote = nodes.block_quote() | |
1180 self.nested_parse(blockquote_lines, line_offset, blockquote) | |
1181 elements.append(blockquote) | |
1182 if attribution_lines: | |
1183 attribution, messages = self.parse_attribution( | |
1184 attribution_lines, attribution_offset) | |
1185 blockquote += attribution | |
1186 elements += messages | |
1187 line_offset = new_line_offset | |
1188 while indented and not indented[0]: | |
1189 indented = indented[1:] | |
1190 line_offset += 1 | |
1191 return elements | |
1192 | |
1193 # U+2014 is an em-dash: | |
1194 attribution_pattern = re.compile(u'(---?(?!-)|\u2014) *(?=[^ \\n])', | |
1195 re.UNICODE) | |
1196 | |
1197 def split_attribution(self, indented, line_offset): | |
1198 """ | |
1199 Check for a block quote attribution and split it off: | |
1200 | |
1201 * First line after a blank line must begin with a dash ("--", "---", | |
1202 em-dash; matches `self.attribution_pattern`). | |
1203 * Every line after that must have consistent indentation. | |
1204 * Attributions must be preceded by block quote content. | |
1205 | |
1206 Return a tuple of: (block quote content lines, content offset, | |
1207 attribution lines, attribution offset, remaining indented lines). | |
1208 """ | |
1209 blank = None | |
1210 nonblank_seen = False | |
1211 for i in range(len(indented)): | |
1212 line = indented[i].rstrip() | |
1213 if line: | |
1214 if nonblank_seen and blank == i - 1: # last line blank | |
1215 match = self.attribution_pattern.match(line) | |
1216 if match: | |
1217 attribution_end, indent = self.check_attribution( | |
1218 indented, i) | |
1219 if attribution_end: | |
1220 a_lines = indented[i:attribution_end] | |
1221 a_lines.trim_left(match.end(), end=1) | |
1222 a_lines.trim_left(indent, start=1) | |
1223 return (indented[:i], a_lines, | |
1224 i, indented[attribution_end:], | |
1225 line_offset + attribution_end) | |
1226 nonblank_seen = True | |
1227 else: | |
1228 blank = i | |
1229 else: | |
1230 return (indented, None, None, None, None) | |
1231 | |
1232 def check_attribution(self, indented, attribution_start): | |
1233 """ | |
1234 Check attribution shape. | |
1235 Return the index past the end of the attribution, and the indent. | |
1236 """ | |
1237 indent = None | |
1238 i = attribution_start + 1 | |
1239 for i in range(attribution_start + 1, len(indented)): | |
1240 line = indented[i].rstrip() | |
1241 if not line: | |
1242 break | |
1243 if indent is None: | |
1244 indent = len(line) - len(line.lstrip()) | |
1245 elif len(line) - len(line.lstrip()) != indent: | |
1246 return None, None # bad shape; not an attribution | |
1247 else: | |
1248 # return index of line after last attribution line: | |
1249 i += 1 | |
1250 return i, (indent or 0) | |
1251 | |
1252 def parse_attribution(self, indented, line_offset): | |
1253 text = '\n'.join(indented).rstrip() | |
1254 lineno = self.state_machine.abs_line_number() + line_offset | |
1255 textnodes, messages = self.inline_text(text, lineno) | |
1256 node = nodes.attribution(text, '', *textnodes) | |
1257 node.source, node.line = self.state_machine.get_source_and_line(lineno) | |
1258 return node, messages | |
1259 | |
1260 def bullet(self, match, context, next_state): | |
1261 """Bullet list item.""" | |
1262 bulletlist = nodes.bullet_list() | |
1263 (bulletlist.source, | |
1264 bulletlist.line) = self.state_machine.get_source_and_line() | |
1265 self.parent += bulletlist | |
1266 bulletlist['bullet'] = match.string[0] | |
1267 i, blank_finish = self.list_item(match.end()) | |
1268 bulletlist += i | |
1269 offset = self.state_machine.line_offset + 1 # next line | |
1270 new_line_offset, blank_finish = self.nested_list_parse( | |
1271 self.state_machine.input_lines[offset:], | |
1272 input_offset=self.state_machine.abs_line_offset() + 1, | |
1273 node=bulletlist, initial_state='BulletList', | |
1274 blank_finish=blank_finish) | |
1275 self.goto_line(new_line_offset) | |
1276 if not blank_finish: | |
1277 self.parent += self.unindent_warning('Bullet list') | |
1278 return [], next_state, [] | |
1279 | |
1280 def list_item(self, indent): | |
1281 if self.state_machine.line[indent:]: | |
1282 indented, line_offset, blank_finish = ( | |
1283 self.state_machine.get_known_indented(indent)) | |
1284 else: | |
1285 indented, indent, line_offset, blank_finish = ( | |
1286 self.state_machine.get_first_known_indented(indent)) | |
1287 listitem = nodes.list_item('\n'.join(indented)) | |
1288 if indented: | |
1289 self.nested_parse(indented, input_offset=line_offset, | |
1290 node=listitem) | |
1291 return listitem, blank_finish | |
1292 | |
1293 def enumerator(self, match, context, next_state): | |
1294 """Enumerated List Item""" | |
1295 format, sequence, text, ordinal = self.parse_enumerator(match) | |
1296 if not self.is_enumerated_list_item(ordinal, sequence, format): | |
1297 raise statemachine.TransitionCorrection('text') | |
1298 enumlist = nodes.enumerated_list() | |
1299 self.parent += enumlist | |
1300 if sequence == '#': | |
1301 enumlist['enumtype'] = 'arabic' | |
1302 else: | |
1303 enumlist['enumtype'] = sequence | |
1304 enumlist['prefix'] = self.enum.formatinfo[format].prefix | |
1305 enumlist['suffix'] = self.enum.formatinfo[format].suffix | |
1306 if ordinal != 1: | |
1307 enumlist['start'] = ordinal | |
1308 msg = self.reporter.info( | |
1309 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)' | |
1310 % (text, ordinal)) | |
1311 self.parent += msg | |
1312 listitem, blank_finish = self.list_item(match.end()) | |
1313 enumlist += listitem | |
1314 offset = self.state_machine.line_offset + 1 # next line | |
1315 newline_offset, blank_finish = self.nested_list_parse( | |
1316 self.state_machine.input_lines[offset:], | |
1317 input_offset=self.state_machine.abs_line_offset() + 1, | |
1318 node=enumlist, initial_state='EnumeratedList', | |
1319 blank_finish=blank_finish, | |
1320 extra_settings={'lastordinal': ordinal, | |
1321 'format': format, | |
1322 'auto': sequence == '#'}) | |
1323 self.goto_line(newline_offset) | |
1324 if not blank_finish: | |
1325 self.parent += self.unindent_warning('Enumerated list') | |
1326 return [], next_state, [] | |
1327 | |
1328 def parse_enumerator(self, match, expected_sequence=None): | |
1329 """ | |
1330 Analyze an enumerator and return the results. | |
1331 | |
1332 :Return: | |
1333 - the enumerator format ('period', 'parens', or 'rparen'), | |
1334 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.), | |
1335 - the text of the enumerator, stripped of formatting, and | |
1336 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.; | |
1337 ``None`` is returned for invalid enumerator text). | |
1338 | |
1339 The enumerator format has already been determined by the regular | |
1340 expression match. If `expected_sequence` is given, that sequence is | |
1341 tried first. If not, we check for Roman numeral 1. This way, | |
1342 single-character Roman numerals (which are also alphabetical) can be | |
1343 matched. If no sequence has been matched, all sequences are checked in | |
1344 order. | |
1345 """ | |
1346 groupdict = match.groupdict() | |
1347 sequence = '' | |
1348 for format in self.enum.formats: | |
1349 if groupdict[format]: # was this the format matched? | |
1350 break # yes; keep `format` | |
1351 else: # shouldn't happen | |
1352 raise ParserError('enumerator format not matched') | |
1353 text = groupdict[format][self.enum.formatinfo[format].start | |
1354 :self.enum.formatinfo[format].end] | |
1355 if text == '#': | |
1356 sequence = '#' | |
1357 elif expected_sequence: | |
1358 try: | |
1359 if self.enum.sequenceregexps[expected_sequence].match(text): | |
1360 sequence = expected_sequence | |
1361 except KeyError: # shouldn't happen | |
1362 raise ParserError('unknown enumerator sequence: %s' | |
1363 % sequence) | |
1364 elif text == 'i': | |
1365 sequence = 'lowerroman' | |
1366 elif text == 'I': | |
1367 sequence = 'upperroman' | |
1368 if not sequence: | |
1369 for sequence in self.enum.sequences: | |
1370 if self.enum.sequenceregexps[sequence].match(text): | |
1371 break | |
1372 else: # shouldn't happen | |
1373 raise ParserError('enumerator sequence not matched') | |
1374 if sequence == '#': | |
1375 ordinal = 1 | |
1376 else: | |
1377 try: | |
1378 ordinal = self.enum.converters[sequence](text) | |
1379 except roman.InvalidRomanNumeralError: | |
1380 ordinal = None | |
1381 return format, sequence, text, ordinal | |
1382 | |
1383 def is_enumerated_list_item(self, ordinal, sequence, format): | |
1384 """ | |
1385 Check validity based on the ordinal value and the second line. | |
1386 | |
1387 Return true if the ordinal is valid and the second line is blank, | |
1388 indented, or starts with the next enumerator or an auto-enumerator. | |
1389 """ | |
1390 if ordinal is None: | |
1391 return None | |
1392 try: | |
1393 next_line = self.state_machine.next_line() | |
1394 except EOFError: # end of input lines | |
1395 self.state_machine.previous_line() | |
1396 return 1 | |
1397 else: | |
1398 self.state_machine.previous_line() | |
1399 if not next_line[:1].strip(): # blank or indented | |
1400 return 1 | |
1401 result = self.make_enumerator(ordinal + 1, sequence, format) | |
1402 if result: | |
1403 next_enumerator, auto_enumerator = result | |
1404 try: | |
1405 if ( next_line.startswith(next_enumerator) or | |
1406 next_line.startswith(auto_enumerator) ): | |
1407 return 1 | |
1408 except TypeError: | |
1409 pass | |
1410 return None | |
1411 | |
1412 def make_enumerator(self, ordinal, sequence, format): | |
1413 """ | |
1414 Construct and return the next enumerated list item marker, and an | |
1415 auto-enumerator ("#" instead of the regular enumerator). | |
1416 | |
1417 Return ``None`` for invalid (out of range) ordinals. | |
1418 """ #" | |
1419 if sequence == '#': | |
1420 enumerator = '#' | |
1421 elif sequence == 'arabic': | |
1422 enumerator = str(ordinal) | |
1423 else: | |
1424 if sequence.endswith('alpha'): | |
1425 if ordinal > 26: | |
1426 return None | |
1427 enumerator = chr(ordinal + ord('a') - 1) | |
1428 elif sequence.endswith('roman'): | |
1429 try: | |
1430 enumerator = roman.toRoman(ordinal) | |
1431 except roman.RomanError: | |
1432 return None | |
1433 else: # shouldn't happen | |
1434 raise ParserError('unknown enumerator sequence: "%s"' | |
1435 % sequence) | |
1436 if sequence.startswith('lower'): | |
1437 enumerator = enumerator.lower() | |
1438 elif sequence.startswith('upper'): | |
1439 enumerator = enumerator.upper() | |
1440 else: # shouldn't happen | |
1441 raise ParserError('unknown enumerator sequence: "%s"' | |
1442 % sequence) | |
1443 formatinfo = self.enum.formatinfo[format] | |
1444 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix | |
1445 + ' ') | |
1446 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' ' | |
1447 return next_enumerator, auto_enumerator | |
1448 | |
1449 def field_marker(self, match, context, next_state): | |
1450 """Field list item.""" | |
1451 field_list = nodes.field_list() | |
1452 self.parent += field_list | |
1453 field, blank_finish = self.field(match) | |
1454 field_list += field | |
1455 offset = self.state_machine.line_offset + 1 # next line | |
1456 newline_offset, blank_finish = self.nested_list_parse( | |
1457 self.state_machine.input_lines[offset:], | |
1458 input_offset=self.state_machine.abs_line_offset() + 1, | |
1459 node=field_list, initial_state='FieldList', | |
1460 blank_finish=blank_finish) | |
1461 self.goto_line(newline_offset) | |
1462 if not blank_finish: | |
1463 self.parent += self.unindent_warning('Field list') | |
1464 return [], next_state, [] | |
1465 | |
1466 def field(self, match): | |
1467 name = self.parse_field_marker(match) | |
1468 src, srcline = self.state_machine.get_source_and_line() | |
1469 lineno = self.state_machine.abs_line_number() | |
1470 indented, indent, line_offset, blank_finish = \ | |
1471 self.state_machine.get_first_known_indented(match.end()) | |
1472 field_node = nodes.field() | |
1473 field_node.source = src | |
1474 field_node.line = srcline | |
1475 name_nodes, name_messages = self.inline_text(name, lineno) | |
1476 field_node += nodes.field_name(name, '', *name_nodes) | |
1477 field_body = nodes.field_body('\n'.join(indented), *name_messages) | |
1478 field_node += field_body | |
1479 if indented: | |
1480 self.parse_field_body(indented, line_offset, field_body) | |
1481 return field_node, blank_finish | |
1482 | |
1483 def parse_field_marker(self, match): | |
1484 """Extract & return field name from a field marker match.""" | |
1485 field = match.group()[1:] # strip off leading ':' | |
1486 field = field[:field.rfind(':')] # strip off trailing ':' etc. | |
1487 return field | |
1488 | |
1489 def parse_field_body(self, indented, offset, node): | |
1490 self.nested_parse(indented, input_offset=offset, node=node) | |
1491 | |
1492 def option_marker(self, match, context, next_state): | |
1493 """Option list item.""" | |
1494 optionlist = nodes.option_list() | |
1495 (optionlist.source, optionlist.line) = self.state_machine.get_source_and_line() | |
1496 try: | |
1497 listitem, blank_finish = self.option_list_item(match) | |
1498 except MarkupError as error: | |
1499 # This shouldn't happen; pattern won't match. | |
1500 msg = self.reporter.error(u'Invalid option list marker: %s' % | |
1501 error) | |
1502 self.parent += msg | |
1503 indented, indent, line_offset, blank_finish = \ | |
1504 self.state_machine.get_first_known_indented(match.end()) | |
1505 elements = self.block_quote(indented, line_offset) | |
1506 self.parent += elements | |
1507 if not blank_finish: | |
1508 self.parent += self.unindent_warning('Option list') | |
1509 return [], next_state, [] | |
1510 self.parent += optionlist | |
1511 optionlist += listitem | |
1512 offset = self.state_machine.line_offset + 1 # next line | |
1513 newline_offset, blank_finish = self.nested_list_parse( | |
1514 self.state_machine.input_lines[offset:], | |
1515 input_offset=self.state_machine.abs_line_offset() + 1, | |
1516 node=optionlist, initial_state='OptionList', | |
1517 blank_finish=blank_finish) | |
1518 self.goto_line(newline_offset) | |
1519 if not blank_finish: | |
1520 self.parent += self.unindent_warning('Option list') | |
1521 return [], next_state, [] | |
1522 | |
1523 def option_list_item(self, match): | |
1524 offset = self.state_machine.abs_line_offset() | |
1525 options = self.parse_option_marker(match) | |
1526 indented, indent, line_offset, blank_finish = \ | |
1527 self.state_machine.get_first_known_indented(match.end()) | |
1528 if not indented: # not an option list item | |
1529 self.goto_line(offset) | |
1530 raise statemachine.TransitionCorrection('text') | |
1531 option_group = nodes.option_group('', *options) | |
1532 description = nodes.description('\n'.join(indented)) | |
1533 option_list_item = nodes.option_list_item('', option_group, | |
1534 description) | |
1535 if indented: | |
1536 self.nested_parse(indented, input_offset=line_offset, | |
1537 node=description) | |
1538 return option_list_item, blank_finish | |
1539 | |
1540 def parse_option_marker(self, match): | |
1541 """ | |
1542 Return a list of `node.option` and `node.option_argument` objects, | |
1543 parsed from an option marker match. | |
1544 | |
1545 :Exception: `MarkupError` for invalid option markers. | |
1546 """ | |
1547 optlist = [] | |
1548 optionstrings = match.group().rstrip().split(', ') | |
1549 for optionstring in optionstrings: | |
1550 tokens = optionstring.split() | |
1551 delimiter = ' ' | |
1552 firstopt = tokens[0].split('=', 1) | |
1553 if len(firstopt) > 1: | |
1554 # "--opt=value" form | |
1555 tokens[:1] = firstopt | |
1556 delimiter = '=' | |
1557 elif (len(tokens[0]) > 2 | |
1558 and ((tokens[0].startswith('-') | |
1559 and not tokens[0].startswith('--')) | |
1560 or tokens[0].startswith('+'))): | |
1561 # "-ovalue" form | |
1562 tokens[:1] = [tokens[0][:2], tokens[0][2:]] | |
1563 delimiter = '' | |
1564 if len(tokens) > 1 and (tokens[1].startswith('<') | |
1565 and tokens[-1].endswith('>')): | |
1566 # "-o <value1 value2>" form; join all values into one token | |
1567 tokens[1:] = [' '.join(tokens[1:])] | |
1568 if 0 < len(tokens) <= 2: | |
1569 option = nodes.option(optionstring) | |
1570 option += nodes.option_string(tokens[0], tokens[0]) | |
1571 if len(tokens) > 1: | |
1572 option += nodes.option_argument(tokens[1], tokens[1], | |
1573 delimiter=delimiter) | |
1574 optlist.append(option) | |
1575 else: | |
1576 raise MarkupError( | |
1577 'wrong number of option tokens (=%s), should be 1 or 2: ' | |
1578 '"%s"' % (len(tokens), optionstring)) | |
1579 return optlist | |
1580 | |
1581 def doctest(self, match, context, next_state): | |
1582 data = '\n'.join(self.state_machine.get_text_block()) | |
1583 # TODO: prepend class value ['pycon'] (Python Console) | |
1584 # parse with `directives.body.CodeBlock` (returns literal-block | |
1585 # with class "code" and syntax highlight markup). | |
1586 self.parent += nodes.doctest_block(data, data) | |
1587 return [], next_state, [] | |
1588 | |
1589 def line_block(self, match, context, next_state): | |
1590 """First line of a line block.""" | |
1591 block = nodes.line_block() | |
1592 self.parent += block | |
1593 lineno = self.state_machine.abs_line_number() | |
1594 line, messages, blank_finish = self.line_block_line(match, lineno) | |
1595 block += line | |
1596 self.parent += messages | |
1597 if not blank_finish: | |
1598 offset = self.state_machine.line_offset + 1 # next line | |
1599 new_line_offset, blank_finish = self.nested_list_parse( | |
1600 self.state_machine.input_lines[offset:], | |
1601 input_offset=self.state_machine.abs_line_offset() + 1, | |
1602 node=block, initial_state='LineBlock', | |
1603 blank_finish=0) | |
1604 self.goto_line(new_line_offset) | |
1605 if not blank_finish: | |
1606 self.parent += self.reporter.warning( | |
1607 'Line block ends without a blank line.', | |
1608 line=lineno+1) | |
1609 if len(block): | |
1610 if block[0].indent is None: | |
1611 block[0].indent = 0 | |
1612 self.nest_line_block_lines(block) | |
1613 return [], next_state, [] | |
1614 | |
1615 def line_block_line(self, match, lineno): | |
1616 """Return one line element of a line_block.""" | |
1617 indented, indent, line_offset, blank_finish = \ | |
1618 self.state_machine.get_first_known_indented(match.end(), | |
1619 until_blank=True) | |
1620 text = u'\n'.join(indented) | |
1621 text_nodes, messages = self.inline_text(text, lineno) | |
1622 line = nodes.line(text, '', *text_nodes) | |
1623 if match.string.rstrip() != '|': # not empty | |
1624 line.indent = len(match.group(1)) - 1 | |
1625 return line, messages, blank_finish | |
1626 | |
1627 def nest_line_block_lines(self, block): | |
1628 for index in range(1, len(block)): | |
1629 if getattr(block[index], 'indent', None) is None: | |
1630 block[index].indent = block[index - 1].indent | |
1631 self.nest_line_block_segment(block) | |
1632 | |
1633 def nest_line_block_segment(self, block): | |
1634 indents = [item.indent for item in block] | |
1635 least = min(indents) | |
1636 new_items = [] | |
1637 new_block = nodes.line_block() | |
1638 for item in block: | |
1639 if item.indent > least: | |
1640 new_block.append(item) | |
1641 else: | |
1642 if len(new_block): | |
1643 self.nest_line_block_segment(new_block) | |
1644 new_items.append(new_block) | |
1645 new_block = nodes.line_block() | |
1646 new_items.append(item) | |
1647 if len(new_block): | |
1648 self.nest_line_block_segment(new_block) | |
1649 new_items.append(new_block) | |
1650 block[:] = new_items | |
1651 | |
1652 def grid_table_top(self, match, context, next_state): | |
1653 """Top border of a full table.""" | |
1654 return self.table_top(match, context, next_state, | |
1655 self.isolate_grid_table, | |
1656 tableparser.GridTableParser) | |
1657 | |
1658 def simple_table_top(self, match, context, next_state): | |
1659 """Top border of a simple table.""" | |
1660 return self.table_top(match, context, next_state, | |
1661 self.isolate_simple_table, | |
1662 tableparser.SimpleTableParser) | |
1663 | |
1664 def table_top(self, match, context, next_state, | |
1665 isolate_function, parser_class): | |
1666 """Top border of a generic table.""" | |
1667 nodelist, blank_finish = self.table(isolate_function, parser_class) | |
1668 self.parent += nodelist | |
1669 if not blank_finish: | |
1670 msg = self.reporter.warning( | |
1671 'Blank line required after table.', | |
1672 line=self.state_machine.abs_line_number()+1) | |
1673 self.parent += msg | |
1674 return [], next_state, [] | |
1675 | |
1676 def table(self, isolate_function, parser_class): | |
1677 """Parse a table.""" | |
1678 block, messages, blank_finish = isolate_function() | |
1679 if block: | |
1680 try: | |
1681 parser = parser_class() | |
1682 tabledata = parser.parse(block) | |
1683 tableline = (self.state_machine.abs_line_number() - len(block) | |
1684 + 1) | |
1685 table = self.build_table(tabledata, tableline) | |
1686 nodelist = [table] + messages | |
1687 except tableparser.TableMarkupError as err: | |
1688 nodelist = self.malformed_table(block, ' '.join(err.args), | |
1689 offset=err.offset) + messages | |
1690 else: | |
1691 nodelist = messages | |
1692 return nodelist, blank_finish | |
1693 | |
1694 def isolate_grid_table(self): | |
1695 messages = [] | |
1696 blank_finish = 1 | |
1697 try: | |
1698 block = self.state_machine.get_text_block(flush_left=True) | |
1699 except statemachine.UnexpectedIndentationError as err: | |
1700 block, src, srcline = err.args | |
1701 messages.append(self.reporter.error('Unexpected indentation.', | |
1702 source=src, line=srcline)) | |
1703 blank_finish = 0 | |
1704 block.disconnect() | |
1705 # for East Asian chars: | |
1706 block.pad_double_width(self.double_width_pad_char) | |
1707 width = len(block[0].strip()) | |
1708 for i in range(len(block)): | |
1709 block[i] = block[i].strip() | |
1710 if block[i][0] not in '+|': # check left edge | |
1711 blank_finish = 0 | |
1712 self.state_machine.previous_line(len(block) - i) | |
1713 del block[i:] | |
1714 break | |
1715 if not self.grid_table_top_pat.match(block[-1]): # find bottom | |
1716 blank_finish = 0 | |
1717 # from second-last to third line of table: | |
1718 for i in range(len(block) - 2, 1, -1): | |
1719 if self.grid_table_top_pat.match(block[i]): | |
1720 self.state_machine.previous_line(len(block) - i + 1) | |
1721 del block[i+1:] | |
1722 break | |
1723 else: | |
1724 messages.extend(self.malformed_table(block)) | |
1725 return [], messages, blank_finish | |
1726 for i in range(len(block)): # check right edge | |
1727 if len(block[i]) != width or block[i][-1] not in '+|': | |
1728 messages.extend(self.malformed_table(block)) | |
1729 return [], messages, blank_finish | |
1730 return block, messages, blank_finish | |
1731 | |
1732 def isolate_simple_table(self): | |
1733 start = self.state_machine.line_offset | |
1734 lines = self.state_machine.input_lines | |
1735 limit = len(lines) - 1 | |
1736 toplen = len(lines[start].strip()) | |
1737 pattern_match = self.simple_table_border_pat.match | |
1738 found = 0 | |
1739 found_at = None | |
1740 i = start + 1 | |
1741 while i <= limit: | |
1742 line = lines[i] | |
1743 match = pattern_match(line) | |
1744 if match: | |
1745 if len(line.strip()) != toplen: | |
1746 self.state_machine.next_line(i - start) | |
1747 messages = self.malformed_table( | |
1748 lines[start:i+1], 'Bottom/header table border does ' | |
1749 'not match top border.') | |
1750 return [], messages, i == limit or not lines[i+1].strip() | |
1751 found += 1 | |
1752 found_at = i | |
1753 if found == 2 or i == limit or not lines[i+1].strip(): | |
1754 end = i | |
1755 break | |
1756 i += 1 | |
1757 else: # reached end of input_lines | |
1758 if found: | |
1759 extra = ' or no blank line after table bottom' | |
1760 self.state_machine.next_line(found_at - start) | |
1761 block = lines[start:found_at+1] | |
1762 else: | |
1763 extra = '' | |
1764 self.state_machine.next_line(i - start - 1) | |
1765 block = lines[start:] | |
1766 messages = self.malformed_table( | |
1767 block, 'No bottom table border found%s.' % extra) | |
1768 return [], messages, not extra | |
1769 self.state_machine.next_line(end - start) | |
1770 block = lines[start:end+1] | |
1771 # for East Asian chars: | |
1772 block.pad_double_width(self.double_width_pad_char) | |
1773 return block, [], end == limit or not lines[end+1].strip() | |
1774 | |
1775 def malformed_table(self, block, detail='', offset=0): | |
1776 block.replace(self.double_width_pad_char, '') | |
1777 data = '\n'.join(block) | |
1778 message = 'Malformed table.' | |
1779 startline = self.state_machine.abs_line_number() - len(block) + 1 | |
1780 if detail: | |
1781 message += '\n' + detail | |
1782 error = self.reporter.error(message, nodes.literal_block(data, data), | |
1783 line=startline+offset) | |
1784 return [error] | |
1785 | |
1786 def build_table(self, tabledata, tableline, stub_columns=0, widths=None): | |
1787 colwidths, headrows, bodyrows = tabledata | |
1788 table = nodes.table() | |
1789 if widths == 'auto': | |
1790 table['classes'] += ['colwidths-auto'] | |
1791 elif widths: # "grid" or list of integers | |
1792 table['classes'] += ['colwidths-given'] | |
1793 tgroup = nodes.tgroup(cols=len(colwidths)) | |
1794 table += tgroup | |
1795 for colwidth in colwidths: | |
1796 colspec = nodes.colspec(colwidth=colwidth) | |
1797 if stub_columns: | |
1798 colspec.attributes['stub'] = 1 | |
1799 stub_columns -= 1 | |
1800 tgroup += colspec | |
1801 if headrows: | |
1802 thead = nodes.thead() | |
1803 tgroup += thead | |
1804 for row in headrows: | |
1805 thead += self.build_table_row(row, tableline) | |
1806 tbody = nodes.tbody() | |
1807 tgroup += tbody | |
1808 for row in bodyrows: | |
1809 tbody += self.build_table_row(row, tableline) | |
1810 return table | |
1811 | |
1812 def build_table_row(self, rowdata, tableline): | |
1813 row = nodes.row() | |
1814 for cell in rowdata: | |
1815 if cell is None: | |
1816 continue | |
1817 morerows, morecols, offset, cellblock = cell | |
1818 attributes = {} | |
1819 if morerows: | |
1820 attributes['morerows'] = morerows | |
1821 if morecols: | |
1822 attributes['morecols'] = morecols | |
1823 entry = nodes.entry(**attributes) | |
1824 row += entry | |
1825 if ''.join(cellblock): | |
1826 self.nested_parse(cellblock, input_offset=tableline+offset, | |
1827 node=entry) | |
1828 return row | |
1829 | |
1830 | |
1831 explicit = Struct() | |
1832 """Patterns and constants used for explicit markup recognition.""" | |
1833 | |
1834 explicit.patterns = Struct( | |
1835 target=re.compile(r""" | |
1836 ( | |
1837 _ # anonymous target | |
1838 | # *OR* | |
1839 (?!_) # no underscore at the beginning | |
1840 (?P<quote>`?) # optional open quote | |
1841 (?![ `]) # first char. not space or | |
1842 # backquote | |
1843 (?P<name> # reference name | |
1844 .+? | |
1845 ) | |
1846 %(non_whitespace_escape_before)s | |
1847 (?P=quote) # close quote if open quote used | |
1848 ) | |
1849 (?<!(?<!\x00):) # no unescaped colon at end | |
1850 %(non_whitespace_escape_before)s | |
1851 [ ]? # optional space | |
1852 : # end of reference name | |
1853 ([ ]+|$) # followed by whitespace | |
1854 """ % vars(Inliner), re.VERBOSE | re.UNICODE), | |
1855 reference=re.compile(r""" | |
1856 ( | |
1857 (?P<simple>%(simplename)s)_ | |
1858 | # *OR* | |
1859 ` # open backquote | |
1860 (?![ ]) # not space | |
1861 (?P<phrase>.+?) # hyperlink phrase | |
1862 %(non_whitespace_escape_before)s | |
1863 `_ # close backquote, | |
1864 # reference mark | |
1865 ) | |
1866 $ # end of string | |
1867 """ % vars(Inliner), re.VERBOSE | re.UNICODE), | |
1868 substitution=re.compile(r""" | |
1869 ( | |
1870 (?![ ]) # first char. not space | |
1871 (?P<name>.+?) # substitution text | |
1872 %(non_whitespace_escape_before)s | |
1873 \| # close delimiter | |
1874 ) | |
1875 ([ ]+|$) # followed by whitespace | |
1876 """ % vars(Inliner), | |
1877 re.VERBOSE | re.UNICODE),) | |
1878 | |
1879 def footnote(self, match): | |
1880 src, srcline = self.state_machine.get_source_and_line() | |
1881 indented, indent, offset, blank_finish = \ | |
1882 self.state_machine.get_first_known_indented(match.end()) | |
1883 label = match.group(1) | |
1884 name = normalize_name(label) | |
1885 footnote = nodes.footnote('\n'.join(indented)) | |
1886 footnote.source = src | |
1887 footnote.line = srcline | |
1888 if name[0] == '#': # auto-numbered | |
1889 name = name[1:] # autonumber label | |
1890 footnote['auto'] = 1 | |
1891 if name: | |
1892 footnote['names'].append(name) | |
1893 self.document.note_autofootnote(footnote) | |
1894 elif name == '*': # auto-symbol | |
1895 name = '' | |
1896 footnote['auto'] = '*' | |
1897 self.document.note_symbol_footnote(footnote) | |
1898 else: # manually numbered | |
1899 footnote += nodes.label('', label) | |
1900 footnote['names'].append(name) | |
1901 self.document.note_footnote(footnote) | |
1902 if name: | |
1903 self.document.note_explicit_target(footnote, footnote) | |
1904 else: | |
1905 self.document.set_id(footnote, footnote) | |
1906 if indented: | |
1907 self.nested_parse(indented, input_offset=offset, node=footnote) | |
1908 return [footnote], blank_finish | |
1909 | |
1910 def citation(self, match): | |
1911 src, srcline = self.state_machine.get_source_and_line() | |
1912 indented, indent, offset, blank_finish = \ | |
1913 self.state_machine.get_first_known_indented(match.end()) | |
1914 label = match.group(1) | |
1915 name = normalize_name(label) | |
1916 citation = nodes.citation('\n'.join(indented)) | |
1917 citation.source = src | |
1918 citation.line = srcline | |
1919 citation += nodes.label('', label) | |
1920 citation['names'].append(name) | |
1921 self.document.note_citation(citation) | |
1922 self.document.note_explicit_target(citation, citation) | |
1923 if indented: | |
1924 self.nested_parse(indented, input_offset=offset, node=citation) | |
1925 return [citation], blank_finish | |
1926 | |
1927 def hyperlink_target(self, match): | |
1928 pattern = self.explicit.patterns.target | |
1929 lineno = self.state_machine.abs_line_number() | |
1930 block, indent, offset, blank_finish = \ | |
1931 self.state_machine.get_first_known_indented( | |
1932 match.end(), until_blank=True, strip_indent=False) | |
1933 blocktext = match.string[:match.end()] + '\n'.join(block) | |
1934 block = [escape2null(line) for line in block] | |
1935 escaped = block[0] | |
1936 blockindex = 0 | |
1937 while True: | |
1938 targetmatch = pattern.match(escaped) | |
1939 if targetmatch: | |
1940 break | |
1941 blockindex += 1 | |
1942 try: | |
1943 escaped += block[blockindex] | |
1944 except IndexError: | |
1945 raise MarkupError('malformed hyperlink target.') | |
1946 del block[:blockindex] | |
1947 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() | |
1948 target = self.make_target(block, blocktext, lineno, | |
1949 targetmatch.group('name')) | |
1950 return [target], blank_finish | |
1951 | |
1952 def make_target(self, block, block_text, lineno, target_name): | |
1953 target_type, data = self.parse_target(block, block_text, lineno) | |
1954 if target_type == 'refname': | |
1955 target = nodes.target(block_text, '', refname=normalize_name(data)) | |
1956 target.indirect_reference_name = data | |
1957 self.add_target(target_name, '', target, lineno) | |
1958 self.document.note_indirect_target(target) | |
1959 return target | |
1960 elif target_type == 'refuri': | |
1961 target = nodes.target(block_text, '') | |
1962 self.add_target(target_name, data, target, lineno) | |
1963 return target | |
1964 else: | |
1965 return data | |
1966 | |
1967 def parse_target(self, block, block_text, lineno): | |
1968 """ | |
1969 Determine the type of reference of a target. | |
1970 | |
1971 :Return: A 2-tuple, one of: | |
1972 | |
1973 - 'refname' and the indirect reference name | |
1974 - 'refuri' and the URI | |
1975 - 'malformed' and a system_message node | |
1976 """ | |
1977 if block and block[-1].strip()[-1:] == '_': # possible indirect target | |
1978 reference = ' '.join([line.strip() for line in block]) | |
1979 refname = self.is_reference(reference) | |
1980 if refname: | |
1981 return 'refname', refname | |
1982 ref_parts = split_escaped_whitespace(' '.join(block)) | |
1983 reference = ' '.join(''.join(unescape(part).split()) | |
1984 for part in ref_parts) | |
1985 return 'refuri', reference | |
1986 | |
1987 def is_reference(self, reference): | |
1988 match = self.explicit.patterns.reference.match( | |
1989 whitespace_normalize_name(reference)) | |
1990 if not match: | |
1991 return None | |
1992 return unescape(match.group('simple') or match.group('phrase')) | |
1993 | |
1994 def add_target(self, targetname, refuri, target, lineno): | |
1995 target.line = lineno | |
1996 if targetname: | |
1997 name = normalize_name(unescape(targetname)) | |
1998 target['names'].append(name) | |
1999 if refuri: | |
2000 uri = self.inliner.adjust_uri(refuri) | |
2001 if uri: | |
2002 target['refuri'] = uri | |
2003 else: | |
2004 raise ApplicationError('problem with URI: %r' % refuri) | |
2005 self.document.note_explicit_target(target, self.parent) | |
2006 else: # anonymous target | |
2007 if refuri: | |
2008 target['refuri'] = refuri | |
2009 target['anonymous'] = 1 | |
2010 self.document.note_anonymous_target(target) | |
2011 | |
2012 def substitution_def(self, match): | |
2013 pattern = self.explicit.patterns.substitution | |
2014 src, srcline = self.state_machine.get_source_and_line() | |
2015 block, indent, offset, blank_finish = \ | |
2016 self.state_machine.get_first_known_indented(match.end(), | |
2017 strip_indent=False) | |
2018 blocktext = (match.string[:match.end()] + '\n'.join(block)) | |
2019 block.disconnect() | |
2020 escaped = escape2null(block[0].rstrip()) | |
2021 blockindex = 0 | |
2022 while True: | |
2023 subdefmatch = pattern.match(escaped) | |
2024 if subdefmatch: | |
2025 break | |
2026 blockindex += 1 | |
2027 try: | |
2028 escaped = escaped + ' ' + escape2null(block[blockindex].strip()) | |
2029 except IndexError: | |
2030 raise MarkupError('malformed substitution definition.') | |
2031 del block[:blockindex] # strip out the substitution marker | |
2032 block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1] | |
2033 if not block[0]: | |
2034 del block[0] | |
2035 offset += 1 | |
2036 while block and not block[-1].strip(): | |
2037 block.pop() | |
2038 subname = subdefmatch.group('name') | |
2039 substitution_node = nodes.substitution_definition(blocktext) | |
2040 substitution_node.source = src | |
2041 substitution_node.line = srcline | |
2042 if not block: | |
2043 msg = self.reporter.warning( | |
2044 'Substitution definition "%s" missing contents.' % subname, | |
2045 nodes.literal_block(blocktext, blocktext), | |
2046 source=src, line=srcline) | |
2047 return [msg], blank_finish | |
2048 block[0] = block[0].strip() | |
2049 substitution_node['names'].append( | |
2050 nodes.whitespace_normalize_name(subname)) | |
2051 new_abs_offset, blank_finish = self.nested_list_parse( | |
2052 block, input_offset=offset, node=substitution_node, | |
2053 initial_state='SubstitutionDef', blank_finish=blank_finish) | |
2054 i = 0 | |
2055 for node in substitution_node[:]: | |
2056 if not (isinstance(node, nodes.Inline) or | |
2057 isinstance(node, nodes.Text)): | |
2058 self.parent += substitution_node[i] | |
2059 del substitution_node[i] | |
2060 else: | |
2061 i += 1 | |
2062 for node in substitution_node.traverse(nodes.Element): | |
2063 if self.disallowed_inside_substitution_definitions(node): | |
2064 pformat = nodes.literal_block('', node.pformat().rstrip()) | |
2065 msg = self.reporter.error( | |
2066 'Substitution definition contains illegal element <%s>:' | |
2067 % node.tagname, | |
2068 pformat, nodes.literal_block(blocktext, blocktext), | |
2069 source=src, line=srcline) | |
2070 return [msg], blank_finish | |
2071 if len(substitution_node) == 0: | |
2072 msg = self.reporter.warning( | |
2073 'Substitution definition "%s" empty or invalid.' % subname, | |
2074 nodes.literal_block(blocktext, blocktext), | |
2075 source=src, line=srcline) | |
2076 return [msg], blank_finish | |
2077 self.document.note_substitution_def( | |
2078 substitution_node, subname, self.parent) | |
2079 return [substitution_node], blank_finish | |
2080 | |
2081 def disallowed_inside_substitution_definitions(self, node): | |
2082 if (node['ids'] or | |
2083 isinstance(node, nodes.reference) and node.get('anonymous') or | |
2084 isinstance(node, nodes.footnote_reference) and node.get('auto')): | |
2085 return True | |
2086 else: | |
2087 return False | |
2088 | |
2089 def directive(self, match, **option_presets): | |
2090 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean.""" | |
2091 type_name = match.group(1) | |
2092 directive_class, messages = directives.directive( | |
2093 type_name, self.memo.language, self.document) | |
2094 self.parent += messages | |
2095 if directive_class: | |
2096 return self.run_directive( | |
2097 directive_class, match, type_name, option_presets) | |
2098 else: | |
2099 return self.unknown_directive(type_name) | |
2100 | |
2101 def run_directive(self, directive, match, type_name, option_presets): | |
2102 """ | |
2103 Parse a directive then run its directive function. | |
2104 | |
2105 Parameters: | |
2106 | |
2107 - `directive`: The class implementing the directive. Must be | |
2108 a subclass of `rst.Directive`. | |
2109 | |
2110 - `match`: A regular expression match object which matched the first | |
2111 line of the directive. | |
2112 | |
2113 - `type_name`: The directive name, as used in the source text. | |
2114 | |
2115 - `option_presets`: A dictionary of preset options, defaults for the | |
2116 directive options. Currently, only an "alt" option is passed by | |
2117 substitution definitions (value: the substitution name), which may | |
2118 be used by an embedded image directive. | |
2119 | |
2120 Returns a 2-tuple: list of nodes, and a "blank finish" boolean. | |
2121 """ | |
2122 if isinstance(directive, (FunctionType, MethodType)): | |
2123 from docutils.parsers.rst import convert_directive_function | |
2124 directive = convert_directive_function(directive) | |
2125 lineno = self.state_machine.abs_line_number() | |
2126 initial_line_offset = self.state_machine.line_offset | |
2127 indented, indent, line_offset, blank_finish \ | |
2128 = self.state_machine.get_first_known_indented(match.end(), | |
2129 strip_top=0) | |
2130 block_text = '\n'.join(self.state_machine.input_lines[ | |
2131 initial_line_offset : self.state_machine.line_offset + 1]) | |
2132 try: | |
2133 arguments, options, content, content_offset = ( | |
2134 self.parse_directive_block(indented, line_offset, | |
2135 directive, option_presets)) | |
2136 except MarkupError as detail: | |
2137 error = self.reporter.error( | |
2138 'Error in "%s" directive:\n%s.' % (type_name, | |
2139 ' '.join(detail.args)), | |
2140 nodes.literal_block(block_text, block_text), line=lineno) | |
2141 return [error], blank_finish | |
2142 directive_instance = directive( | |
2143 type_name, arguments, options, content, lineno, | |
2144 content_offset, block_text, self, self.state_machine) | |
2145 try: | |
2146 result = directive_instance.run() | |
2147 except docutils.parsers.rst.DirectiveError as error: | |
2148 msg_node = self.reporter.system_message(error.level, error.msg, | |
2149 line=lineno) | |
2150 msg_node += nodes.literal_block(block_text, block_text) | |
2151 result = [msg_node] | |
2152 assert isinstance(result, list), \ | |
2153 'Directive "%s" must return a list of nodes.' % type_name | |
2154 for i in range(len(result)): | |
2155 assert isinstance(result[i], nodes.Node), \ | |
2156 ('Directive "%s" returned non-Node object (index %s): %r' | |
2157 % (type_name, i, result[i])) | |
2158 return (result, | |
2159 blank_finish or self.state_machine.is_next_line_blank()) | |
2160 | |
2161 def parse_directive_block(self, indented, line_offset, directive, | |
2162 option_presets): | |
2163 option_spec = directive.option_spec | |
2164 has_content = directive.has_content | |
2165 if indented and not indented[0].strip(): | |
2166 indented.trim_start() | |
2167 line_offset += 1 | |
2168 while indented and not indented[-1].strip(): | |
2169 indented.trim_end() | |
2170 if indented and (directive.required_arguments | |
2171 or directive.optional_arguments | |
2172 or option_spec): | |
2173 for i, line in enumerate(indented): | |
2174 if not line.strip(): | |
2175 break | |
2176 else: | |
2177 i += 1 | |
2178 arg_block = indented[:i] | |
2179 content = indented[i+1:] | |
2180 content_offset = line_offset + i + 1 | |
2181 else: | |
2182 content = indented | |
2183 content_offset = line_offset | |
2184 arg_block = [] | |
2185 if option_spec: | |
2186 options, arg_block = self.parse_directive_options( | |
2187 option_presets, option_spec, arg_block) | |
2188 else: | |
2189 options = {} | |
2190 if arg_block and not (directive.required_arguments | |
2191 or directive.optional_arguments): | |
2192 content = arg_block + indented[i:] | |
2193 content_offset = line_offset | |
2194 arg_block = [] | |
2195 while content and not content[0].strip(): | |
2196 content.trim_start() | |
2197 content_offset += 1 | |
2198 if directive.required_arguments or directive.optional_arguments: | |
2199 arguments = self.parse_directive_arguments( | |
2200 directive, arg_block) | |
2201 else: | |
2202 arguments = [] | |
2203 if content and not has_content: | |
2204 raise MarkupError('no content permitted') | |
2205 return (arguments, options, content, content_offset) | |
2206 | |
2207 def parse_directive_options(self, option_presets, option_spec, arg_block): | |
2208 options = option_presets.copy() | |
2209 for i, line in enumerate(arg_block): | |
2210 if re.match(Body.patterns['field_marker'], line): | |
2211 opt_block = arg_block[i:] | |
2212 arg_block = arg_block[:i] | |
2213 break | |
2214 else: | |
2215 opt_block = [] | |
2216 if opt_block: | |
2217 success, data = self.parse_extension_options(option_spec, | |
2218 opt_block) | |
2219 if success: # data is a dict of options | |
2220 options.update(data) | |
2221 else: # data is an error string | |
2222 raise MarkupError(data) | |
2223 return options, arg_block | |
2224 | |
2225 def parse_directive_arguments(self, directive, arg_block): | |
2226 required = directive.required_arguments | |
2227 optional = directive.optional_arguments | |
2228 arg_text = '\n'.join(arg_block) | |
2229 arguments = arg_text.split() | |
2230 if len(arguments) < required: | |
2231 raise MarkupError('%s argument(s) required, %s supplied' | |
2232 % (required, len(arguments))) | |
2233 elif len(arguments) > required + optional: | |
2234 if directive.final_argument_whitespace: | |
2235 arguments = arg_text.split(None, required + optional - 1) | |
2236 else: | |
2237 raise MarkupError( | |
2238 'maximum %s argument(s) allowed, %s supplied' | |
2239 % (required + optional, len(arguments))) | |
2240 return arguments | |
2241 | |
2242 def parse_extension_options(self, option_spec, datalines): | |
2243 """ | |
2244 Parse `datalines` for a field list containing extension options | |
2245 matching `option_spec`. | |
2246 | |
2247 :Parameters: | |
2248 - `option_spec`: a mapping of option name to conversion | |
2249 function, which should raise an exception on bad input. | |
2250 - `datalines`: a list of input strings. | |
2251 | |
2252 :Return: | |
2253 - Success value, 1 or 0. | |
2254 - An option dictionary on success, an error string on failure. | |
2255 """ | |
2256 node = nodes.field_list() | |
2257 newline_offset, blank_finish = self.nested_list_parse( | |
2258 datalines, 0, node, initial_state='ExtensionOptions', | |
2259 blank_finish=True) | |
2260 if newline_offset != len(datalines): # incomplete parse of block | |
2261 return 0, 'invalid option block' | |
2262 try: | |
2263 options = utils.extract_extension_options(node, option_spec) | |
2264 except KeyError as detail: | |
2265 return 0, ('unknown option: "%s"' % detail.args[0]) | |
2266 except (ValueError, TypeError) as detail: | |
2267 return 0, ('invalid option value: %s' % ' '.join(detail.args)) | |
2268 except utils.ExtensionOptionError as detail: | |
2269 return 0, ('invalid option data: %s' % ' '.join(detail.args)) | |
2270 if blank_finish: | |
2271 return 1, options | |
2272 else: | |
2273 return 0, 'option data incompletely parsed' | |
2274 | |
2275 def unknown_directive(self, type_name): | |
2276 lineno = self.state_machine.abs_line_number() | |
2277 indented, indent, offset, blank_finish = \ | |
2278 self.state_machine.get_first_known_indented(0, strip_indent=False) | |
2279 text = '\n'.join(indented) | |
2280 error = self.reporter.error( | |
2281 'Unknown directive type "%s".' % type_name, | |
2282 nodes.literal_block(text, text), line=lineno) | |
2283 return [error], blank_finish | |
2284 | |
2285 def comment(self, match): | |
2286 if not match.string[match.end():].strip() \ | |
2287 and self.state_machine.is_next_line_blank(): # an empty comment? | |
2288 return [nodes.comment()], 1 # "A tiny but practical wart." | |
2289 indented, indent, offset, blank_finish = \ | |
2290 self.state_machine.get_first_known_indented(match.end()) | |
2291 while indented and not indented[-1].strip(): | |
2292 indented.trim_end() | |
2293 text = '\n'.join(indented) | |
2294 return [nodes.comment(text, text)], blank_finish | |
2295 | |
2296 explicit.constructs = [ | |
2297 (footnote, | |
2298 re.compile(r""" | |
2299 \.\.[ ]+ # explicit markup start | |
2300 \[ | |
2301 ( # footnote label: | |
2302 [0-9]+ # manually numbered footnote | |
2303 | # *OR* | |
2304 \# # anonymous auto-numbered footnote | |
2305 | # *OR* | |
2306 \#%s # auto-number ed?) footnote label | |
2307 | # *OR* | |
2308 \* # auto-symbol footnote | |
2309 ) | |
2310 \] | |
2311 ([ ]+|$) # whitespace or end of line | |
2312 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)), | |
2313 (citation, | |
2314 re.compile(r""" | |
2315 \.\.[ ]+ # explicit markup start | |
2316 \[(%s)\] # citation label | |
2317 ([ ]+|$) # whitespace or end of line | |
2318 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)), | |
2319 (hyperlink_target, | |
2320 re.compile(r""" | |
2321 \.\.[ ]+ # explicit markup start | |
2322 _ # target indicator | |
2323 (?![ ]|$) # first char. not space or EOL | |
2324 """, re.VERBOSE | re.UNICODE)), | |
2325 (substitution_def, | |
2326 re.compile(r""" | |
2327 \.\.[ ]+ # explicit markup start | |
2328 \| # substitution indicator | |
2329 (?![ ]|$) # first char. not space or EOL | |
2330 """, re.VERBOSE | re.UNICODE)), | |
2331 (directive, | |
2332 re.compile(r""" | |
2333 \.\.[ ]+ # explicit markup start | |
2334 (%s) # directive name | |
2335 [ ]? # optional space | |
2336 :: # directive delimiter | |
2337 ([ ]+|$) # whitespace or end of line | |
2338 """ % Inliner.simplename, re.VERBOSE | re.UNICODE))] | |
2339 | |
2340 def explicit_markup(self, match, context, next_state): | |
2341 """Footnotes, hyperlink targets, directives, comments.""" | |
2342 nodelist, blank_finish = self.explicit_construct(match) | |
2343 self.parent += nodelist | |
2344 self.explicit_list(blank_finish) | |
2345 return [], next_state, [] | |
2346 | |
2347 def explicit_construct(self, match): | |
2348 """Determine which explicit construct this is, parse & return it.""" | |
2349 errors = [] | |
2350 for method, pattern in self.explicit.constructs: | |
2351 expmatch = pattern.match(match.string) | |
2352 if expmatch: | |
2353 try: | |
2354 return method(self, expmatch) | |
2355 except MarkupError as error: | |
2356 lineno = self.state_machine.abs_line_number() | |
2357 message = ' '.join(error.args) | |
2358 errors.append(self.reporter.warning(message, line=lineno)) | |
2359 break | |
2360 nodelist, blank_finish = self.comment(match) | |
2361 return nodelist + errors, blank_finish | |
2362 | |
2363 def explicit_list(self, blank_finish): | |
2364 """ | |
2365 Create a nested state machine for a series of explicit markup | |
2366 constructs (including anonymous hyperlink targets). | |
2367 """ | |
2368 offset = self.state_machine.line_offset + 1 # next line | |
2369 newline_offset, blank_finish = self.nested_list_parse( | |
2370 self.state_machine.input_lines[offset:], | |
2371 input_offset=self.state_machine.abs_line_offset() + 1, | |
2372 node=self.parent, initial_state='Explicit', | |
2373 blank_finish=blank_finish, | |
2374 match_titles=self.state_machine.match_titles) | |
2375 self.goto_line(newline_offset) | |
2376 if not blank_finish: | |
2377 self.parent += self.unindent_warning('Explicit markup') | |
2378 | |
2379 def anonymous(self, match, context, next_state): | |
2380 """Anonymous hyperlink targets.""" | |
2381 nodelist, blank_finish = self.anonymous_target(match) | |
2382 self.parent += nodelist | |
2383 self.explicit_list(blank_finish) | |
2384 return [], next_state, [] | |
2385 | |
2386 def anonymous_target(self, match): | |
2387 lineno = self.state_machine.abs_line_number() | |
2388 block, indent, offset, blank_finish \ | |
2389 = self.state_machine.get_first_known_indented(match.end(), | |
2390 until_blank=True) | |
2391 blocktext = match.string[:match.end()] + '\n'.join(block) | |
2392 block = [escape2null(line) for line in block] | |
2393 target = self.make_target(block, blocktext, lineno, '') | |
2394 return [target], blank_finish | |
2395 | |
2396 def line(self, match, context, next_state): | |
2397 """Section title overline or transition marker.""" | |
2398 if self.state_machine.match_titles: | |
2399 return [match.string], 'Line', [] | |
2400 elif match.string.strip() == '::': | |
2401 raise statemachine.TransitionCorrection('text') | |
2402 elif len(match.string.strip()) < 4: | |
2403 msg = self.reporter.info( | |
2404 'Unexpected possible title overline or transition.\n' | |
2405 "Treating it as ordinary text because it's so short.", | |
2406 line=self.state_machine.abs_line_number()) | |
2407 self.parent += msg | |
2408 raise statemachine.TransitionCorrection('text') | |
2409 else: | |
2410 blocktext = self.state_machine.line | |
2411 msg = self.reporter.severe( | |
2412 'Unexpected section title or transition.', | |
2413 nodes.literal_block(blocktext, blocktext), | |
2414 line=self.state_machine.abs_line_number()) | |
2415 self.parent += msg | |
2416 return [], next_state, [] | |
2417 | |
2418 def text(self, match, context, next_state): | |
2419 """Titles, definition lists, paragraphs.""" | |
2420 return [match.string], 'Text', [] | |
2421 | |
2422 | |
2423 class RFC2822Body(Body): | |
2424 | |
2425 """ | |
2426 RFC2822 headers are only valid as the first constructs in documents. As | |
2427 soon as anything else appears, the `Body` state should take over. | |
2428 """ | |
2429 | |
2430 patterns = Body.patterns.copy() # can't modify the original | |
2431 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)' | |
2432 initial_transitions = [(name, 'Body') | |
2433 for name in Body.initial_transitions] | |
2434 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text' | |
2435 | |
2436 def rfc2822(self, match, context, next_state): | |
2437 """RFC2822-style field list item.""" | |
2438 fieldlist = nodes.field_list(classes=['rfc2822']) | |
2439 self.parent += fieldlist | |
2440 field, blank_finish = self.rfc2822_field(match) | |
2441 fieldlist += field | |
2442 offset = self.state_machine.line_offset + 1 # next line | |
2443 newline_offset, blank_finish = self.nested_list_parse( | |
2444 self.state_machine.input_lines[offset:], | |
2445 input_offset=self.state_machine.abs_line_offset() + 1, | |
2446 node=fieldlist, initial_state='RFC2822List', | |
2447 blank_finish=blank_finish) | |
2448 self.goto_line(newline_offset) | |
2449 if not blank_finish: | |
2450 self.parent += self.unindent_warning( | |
2451 'RFC2822-style field list') | |
2452 return [], next_state, [] | |
2453 | |
2454 def rfc2822_field(self, match): | |
2455 name = match.string[:match.string.find(':')] | |
2456 indented, indent, line_offset, blank_finish = \ | |
2457 self.state_machine.get_first_known_indented(match.end(), | |
2458 until_blank=True) | |
2459 fieldnode = nodes.field() | |
2460 fieldnode += nodes.field_name(name, name) | |
2461 fieldbody = nodes.field_body('\n'.join(indented)) | |
2462 fieldnode += fieldbody | |
2463 if indented: | |
2464 self.nested_parse(indented, input_offset=line_offset, | |
2465 node=fieldbody) | |
2466 return fieldnode, blank_finish | |
2467 | |
2468 | |
2469 class SpecializedBody(Body): | |
2470 | |
2471 """ | |
2472 Superclass for second and subsequent compound element members. Compound | |
2473 elements are lists and list-like constructs. | |
2474 | |
2475 All transition methods are disabled (redefined as `invalid_input`). | |
2476 Override individual methods in subclasses to re-enable. | |
2477 | |
2478 For example, once an initial bullet list item, say, is recognized, the | |
2479 `BulletList` subclass takes over, with a "bullet_list" node as its | |
2480 container. Upon encountering the initial bullet list item, `Body.bullet` | |
2481 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which | |
2482 starts up a nested parsing session with `BulletList` as the initial state. | |
2483 Only the ``bullet`` transition method is enabled in `BulletList`; as long | |
2484 as only bullet list items are encountered, they are parsed and inserted | |
2485 into the container. The first construct which is *not* a bullet list item | |
2486 triggers the `invalid_input` method, which ends the nested parse and | |
2487 closes the container. `BulletList` needs to recognize input that is | |
2488 invalid in the context of a bullet list, which means everything *other | |
2489 than* bullet list items, so it inherits the transition list created in | |
2490 `Body`. | |
2491 """ | |
2492 | |
2493 def invalid_input(self, match=None, context=None, next_state=None): | |
2494 """Not a compound element member. Abort this state machine.""" | |
2495 self.state_machine.previous_line() # back up so parent SM can reassess | |
2496 raise EOFError | |
2497 | |
2498 indent = invalid_input | |
2499 bullet = invalid_input | |
2500 enumerator = invalid_input | |
2501 field_marker = invalid_input | |
2502 option_marker = invalid_input | |
2503 doctest = invalid_input | |
2504 line_block = invalid_input | |
2505 grid_table_top = invalid_input | |
2506 simple_table_top = invalid_input | |
2507 explicit_markup = invalid_input | |
2508 anonymous = invalid_input | |
2509 line = invalid_input | |
2510 text = invalid_input | |
2511 | |
2512 | |
2513 class BulletList(SpecializedBody): | |
2514 | |
2515 """Second and subsequent bullet_list list_items.""" | |
2516 | |
2517 def bullet(self, match, context, next_state): | |
2518 """Bullet list item.""" | |
2519 if match.string[0] != self.parent['bullet']: | |
2520 # different bullet: new list | |
2521 self.invalid_input() | |
2522 listitem, blank_finish = self.list_item(match.end()) | |
2523 self.parent += listitem | |
2524 self.blank_finish = blank_finish | |
2525 return [], next_state, [] | |
2526 | |
2527 | |
2528 class DefinitionList(SpecializedBody): | |
2529 | |
2530 """Second and subsequent definition_list_items.""" | |
2531 | |
2532 def text(self, match, context, next_state): | |
2533 """Definition lists.""" | |
2534 return [match.string], 'Definition', [] | |
2535 | |
2536 | |
2537 class EnumeratedList(SpecializedBody): | |
2538 | |
2539 """Second and subsequent enumerated_list list_items.""" | |
2540 | |
2541 def enumerator(self, match, context, next_state): | |
2542 """Enumerated list item.""" | |
2543 format, sequence, text, ordinal = self.parse_enumerator( | |
2544 match, self.parent['enumtype']) | |
2545 if ( format != self.format | |
2546 or (sequence != '#' and (sequence != self.parent['enumtype'] | |
2547 or self.auto | |
2548 or ordinal != (self.lastordinal + 1))) | |
2549 or not self.is_enumerated_list_item(ordinal, sequence, format)): | |
2550 # different enumeration: new list | |
2551 self.invalid_input() | |
2552 if sequence == '#': | |
2553 self.auto = 1 | |
2554 listitem, blank_finish = self.list_item(match.end()) | |
2555 self.parent += listitem | |
2556 self.blank_finish = blank_finish | |
2557 self.lastordinal = ordinal | |
2558 return [], next_state, [] | |
2559 | |
2560 | |
2561 class FieldList(SpecializedBody): | |
2562 | |
2563 """Second and subsequent field_list fields.""" | |
2564 | |
2565 def field_marker(self, match, context, next_state): | |
2566 """Field list field.""" | |
2567 field, blank_finish = self.field(match) | |
2568 self.parent += field | |
2569 self.blank_finish = blank_finish | |
2570 return [], next_state, [] | |
2571 | |
2572 | |
2573 class OptionList(SpecializedBody): | |
2574 | |
2575 """Second and subsequent option_list option_list_items.""" | |
2576 | |
2577 def option_marker(self, match, context, next_state): | |
2578 """Option list item.""" | |
2579 try: | |
2580 option_list_item, blank_finish = self.option_list_item(match) | |
2581 except MarkupError: | |
2582 self.invalid_input() | |
2583 self.parent += option_list_item | |
2584 self.blank_finish = blank_finish | |
2585 return [], next_state, [] | |
2586 | |
2587 | |
2588 class RFC2822List(SpecializedBody, RFC2822Body): | |
2589 | |
2590 """Second and subsequent RFC2822-style field_list fields.""" | |
2591 | |
2592 patterns = RFC2822Body.patterns | |
2593 initial_transitions = RFC2822Body.initial_transitions | |
2594 | |
2595 def rfc2822(self, match, context, next_state): | |
2596 """RFC2822-style field list item.""" | |
2597 field, blank_finish = self.rfc2822_field(match) | |
2598 self.parent += field | |
2599 self.blank_finish = blank_finish | |
2600 return [], 'RFC2822List', [] | |
2601 | |
2602 blank = SpecializedBody.invalid_input | |
2603 | |
2604 | |
2605 class ExtensionOptions(FieldList): | |
2606 | |
2607 """ | |
2608 Parse field_list fields for extension options. | |
2609 | |
2610 No nested parsing is done (including inline markup parsing). | |
2611 """ | |
2612 | |
2613 def parse_field_body(self, indented, offset, node): | |
2614 """Override `Body.parse_field_body` for simpler parsing.""" | |
2615 lines = [] | |
2616 for line in list(indented) + ['']: | |
2617 if line.strip(): | |
2618 lines.append(line) | |
2619 elif lines: | |
2620 text = '\n'.join(lines) | |
2621 node += nodes.paragraph(text, text) | |
2622 lines = [] | |
2623 | |
2624 | |
2625 class LineBlock(SpecializedBody): | |
2626 | |
2627 """Second and subsequent lines of a line_block.""" | |
2628 | |
2629 blank = SpecializedBody.invalid_input | |
2630 | |
2631 def line_block(self, match, context, next_state): | |
2632 """New line of line block.""" | |
2633 lineno = self.state_machine.abs_line_number() | |
2634 line, messages, blank_finish = self.line_block_line(match, lineno) | |
2635 self.parent += line | |
2636 self.parent.parent += messages | |
2637 self.blank_finish = blank_finish | |
2638 return [], next_state, [] | |
2639 | |
2640 | |
2641 class Explicit(SpecializedBody): | |
2642 | |
2643 """Second and subsequent explicit markup construct.""" | |
2644 | |
2645 def explicit_markup(self, match, context, next_state): | |
2646 """Footnotes, hyperlink targets, directives, comments.""" | |
2647 nodelist, blank_finish = self.explicit_construct(match) | |
2648 self.parent += nodelist | |
2649 self.blank_finish = blank_finish | |
2650 return [], next_state, [] | |
2651 | |
2652 def anonymous(self, match, context, next_state): | |
2653 """Anonymous hyperlink targets.""" | |
2654 nodelist, blank_finish = self.anonymous_target(match) | |
2655 self.parent += nodelist | |
2656 self.blank_finish = blank_finish | |
2657 return [], next_state, [] | |
2658 | |
2659 blank = SpecializedBody.invalid_input | |
2660 | |
2661 | |
2662 class SubstitutionDef(Body): | |
2663 | |
2664 """ | |
2665 Parser for the contents of a substitution_definition element. | |
2666 """ | |
2667 | |
2668 patterns = { | |
2669 'embedded_directive': re.compile(r'(%s)::( +|$)' | |
2670 % Inliner.simplename, re.UNICODE), | |
2671 'text': r''} | |
2672 initial_transitions = ['embedded_directive', 'text'] | |
2673 | |
2674 def embedded_directive(self, match, context, next_state): | |
2675 nodelist, blank_finish = self.directive(match, | |
2676 alt=self.parent['names'][0]) | |
2677 self.parent += nodelist | |
2678 if not self.state_machine.at_eof(): | |
2679 self.blank_finish = blank_finish | |
2680 raise EOFError | |
2681 | |
2682 def text(self, match, context, next_state): | |
2683 if not self.state_machine.at_eof(): | |
2684 self.blank_finish = self.state_machine.is_next_line_blank() | |
2685 raise EOFError | |
2686 | |
2687 | |
2688 class Text(RSTState): | |
2689 | |
2690 """ | |
2691 Classifier of second line of a text block. | |
2692 | |
2693 Could be a paragraph, a definition list item, or a title. | |
2694 """ | |
2695 | |
2696 patterns = {'underline': Body.patterns['line'], | |
2697 'text': r''} | |
2698 initial_transitions = [('underline', 'Body'), ('text', 'Body')] | |
2699 | |
2700 def blank(self, match, context, next_state): | |
2701 """End of paragraph.""" | |
2702 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext | |
2703 paragraph, literalnext = self.paragraph( | |
2704 context, self.state_machine.abs_line_number() - 1) | |
2705 self.parent += paragraph | |
2706 if literalnext: | |
2707 self.parent += self.literal_block() | |
2708 return [], 'Body', [] | |
2709 | |
2710 def eof(self, context): | |
2711 if context: | |
2712 self.blank(None, context, None) | |
2713 return [] | |
2714 | |
2715 def indent(self, match, context, next_state): | |
2716 """Definition list item.""" | |
2717 definitionlist = nodes.definition_list() | |
2718 definitionlistitem, blank_finish = self.definition_list_item(context) | |
2719 definitionlist += definitionlistitem | |
2720 self.parent += definitionlist | |
2721 offset = self.state_machine.line_offset + 1 # next line | |
2722 newline_offset, blank_finish = self.nested_list_parse( | |
2723 self.state_machine.input_lines[offset:], | |
2724 input_offset=self.state_machine.abs_line_offset() + 1, | |
2725 node=definitionlist, initial_state='DefinitionList', | |
2726 blank_finish=blank_finish, blank_finish_state='Definition') | |
2727 self.goto_line(newline_offset) | |
2728 if not blank_finish: | |
2729 self.parent += self.unindent_warning('Definition list') | |
2730 return [], 'Body', [] | |
2731 | |
2732 def underline(self, match, context, next_state): | |
2733 """Section title.""" | |
2734 lineno = self.state_machine.abs_line_number() | |
2735 title = context[0].rstrip() | |
2736 underline = match.string.rstrip() | |
2737 source = title + '\n' + underline | |
2738 messages = [] | |
2739 if column_width(title) > len(underline): | |
2740 if len(underline) < 4: | |
2741 if self.state_machine.match_titles: | |
2742 msg = self.reporter.info( | |
2743 'Possible title underline, too short for the title.\n' | |
2744 "Treating it as ordinary text because it's so short.", | |
2745 line=lineno) | |
2746 self.parent += msg | |
2747 raise statemachine.TransitionCorrection('text') | |
2748 else: | |
2749 blocktext = context[0] + '\n' + self.state_machine.line | |
2750 msg = self.reporter.warning('Title underline too short.', | |
2751 nodes.literal_block(blocktext, blocktext), line=lineno) | |
2752 messages.append(msg) | |
2753 if not self.state_machine.match_titles: | |
2754 blocktext = context[0] + '\n' + self.state_machine.line | |
2755 # We need get_source_and_line() here to report correctly | |
2756 src, srcline = self.state_machine.get_source_and_line() | |
2757 # TODO: why is abs_line_number() == srcline+1 | |
2758 # if the error is in a table (try with test_tables.py)? | |
2759 # print("get_source_and_line", srcline) | |
2760 # print("abs_line_number", self.state_machine.abs_line_number()) | |
2761 msg = self.reporter.severe('Unexpected section title.', | |
2762 nodes.literal_block(blocktext, blocktext), | |
2763 source=src, line=srcline) | |
2764 self.parent += messages | |
2765 self.parent += msg | |
2766 return [], next_state, [] | |
2767 style = underline[0] | |
2768 context[:] = [] | |
2769 self.section(title, source, style, lineno - 1, messages) | |
2770 return [], next_state, [] | |
2771 | |
2772 def text(self, match, context, next_state): | |
2773 """Paragraph.""" | |
2774 startline = self.state_machine.abs_line_number() - 1 | |
2775 msg = None | |
2776 try: | |
2777 block = self.state_machine.get_text_block(flush_left=True) | |
2778 except statemachine.UnexpectedIndentationError as err: | |
2779 block, src, srcline = err.args | |
2780 msg = self.reporter.error('Unexpected indentation.', | |
2781 source=src, line=srcline) | |
2782 lines = context + list(block) | |
2783 paragraph, literalnext = self.paragraph(lines, startline) | |
2784 self.parent += paragraph | |
2785 self.parent += msg | |
2786 if literalnext: | |
2787 try: | |
2788 self.state_machine.next_line() | |
2789 except EOFError: | |
2790 pass | |
2791 self.parent += self.literal_block() | |
2792 return [], next_state, [] | |
2793 | |
2794 def literal_block(self): | |
2795 """Return a list of nodes.""" | |
2796 indented, indent, offset, blank_finish = \ | |
2797 self.state_machine.get_indented() | |
2798 while indented and not indented[-1].strip(): | |
2799 indented.trim_end() | |
2800 if not indented: | |
2801 return self.quoted_literal_block() | |
2802 data = '\n'.join(indented) | |
2803 literal_block = nodes.literal_block(data, data) | |
2804 (literal_block.source, | |
2805 literal_block.line) = self.state_machine.get_source_and_line(offset+1) | |
2806 nodelist = [literal_block] | |
2807 if not blank_finish: | |
2808 nodelist.append(self.unindent_warning('Literal block')) | |
2809 return nodelist | |
2810 | |
2811 def quoted_literal_block(self): | |
2812 abs_line_offset = self.state_machine.abs_line_offset() | |
2813 offset = self.state_machine.line_offset | |
2814 parent_node = nodes.Element() | |
2815 new_abs_offset = self.nested_parse( | |
2816 self.state_machine.input_lines[offset:], | |
2817 input_offset=abs_line_offset, node=parent_node, match_titles=False, | |
2818 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,), | |
2819 'initial_state': 'QuotedLiteralBlock'}) | |
2820 self.goto_line(new_abs_offset) | |
2821 return parent_node.children | |
2822 | |
2823 def definition_list_item(self, termline): | |
2824 indented, indent, line_offset, blank_finish = \ | |
2825 self.state_machine.get_indented() | |
2826 itemnode = nodes.definition_list_item( | |
2827 '\n'.join(termline + list(indented))) | |
2828 lineno = self.state_machine.abs_line_number() - 1 | |
2829 (itemnode.source, | |
2830 itemnode.line) = self.state_machine.get_source_and_line(lineno) | |
2831 termlist, messages = self.term(termline, lineno) | |
2832 itemnode += termlist | |
2833 definition = nodes.definition('', *messages) | |
2834 itemnode += definition | |
2835 if termline[0][-2:] == '::': | |
2836 definition += self.reporter.info( | |
2837 'Blank line missing before literal block (after the "::")? ' | |
2838 'Interpreted as a definition list item.', | |
2839 line=lineno+1) | |
2840 self.nested_parse(indented, input_offset=line_offset, node=definition) | |
2841 return itemnode, blank_finish | |
2842 | |
2843 classifier_delimiter = re.compile(' +: +') | |
2844 | |
2845 def term(self, lines, lineno): | |
2846 """Return a definition_list's term and optional classifiers.""" | |
2847 assert len(lines) == 1 | |
2848 text_nodes, messages = self.inline_text(lines[0], lineno) | |
2849 term_node = nodes.term(lines[0]) | |
2850 (term_node.source, | |
2851 term_node.line) = self.state_machine.get_source_and_line(lineno) | |
2852 node_list = [term_node] | |
2853 for i in range(len(text_nodes)): | |
2854 node = text_nodes[i] | |
2855 if isinstance(node, nodes.Text): | |
2856 parts = self.classifier_delimiter.split(node) | |
2857 if len(parts) == 1: | |
2858 node_list[-1] += node | |
2859 else: | |
2860 text = parts[0].rstrip() | |
2861 textnode = nodes.Text(text) | |
2862 node_list[-1] += textnode | |
2863 for part in parts[1:]: | |
2864 node_list.append( | |
2865 nodes.classifier(unescape(part, True), part)) | |
2866 else: | |
2867 node_list[-1] += node | |
2868 return node_list, messages | |
2869 | |
2870 | |
2871 class SpecializedText(Text): | |
2872 | |
2873 """ | |
2874 Superclass for second and subsequent lines of Text-variants. | |
2875 | |
2876 All transition methods are disabled. Override individual methods in | |
2877 subclasses to re-enable. | |
2878 """ | |
2879 | |
2880 def eof(self, context): | |
2881 """Incomplete construct.""" | |
2882 return [] | |
2883 | |
2884 def invalid_input(self, match=None, context=None, next_state=None): | |
2885 """Not a compound element member. Abort this state machine.""" | |
2886 raise EOFError | |
2887 | |
2888 blank = invalid_input | |
2889 indent = invalid_input | |
2890 underline = invalid_input | |
2891 text = invalid_input | |
2892 | |
2893 | |
2894 class Definition(SpecializedText): | |
2895 | |
2896 """Second line of potential definition_list_item.""" | |
2897 | |
2898 def eof(self, context): | |
2899 """Not a definition.""" | |
2900 self.state_machine.previous_line(2) # so parent SM can reassess | |
2901 return [] | |
2902 | |
2903 def indent(self, match, context, next_state): | |
2904 """Definition list item.""" | |
2905 itemnode, blank_finish = self.definition_list_item(context) | |
2906 self.parent += itemnode | |
2907 self.blank_finish = blank_finish | |
2908 return [], 'DefinitionList', [] | |
2909 | |
2910 | |
2911 class Line(SpecializedText): | |
2912 | |
2913 """ | |
2914 Second line of over- & underlined section title or transition marker. | |
2915 """ | |
2916 | |
2917 eofcheck = 1 # @@@ ??? | |
2918 """Set to 0 while parsing sections, so that we don't catch the EOF.""" | |
2919 | |
2920 def eof(self, context): | |
2921 """Transition marker at end of section or document.""" | |
2922 marker = context[0].strip() | |
2923 if self.memo.section_bubble_up_kludge: | |
2924 self.memo.section_bubble_up_kludge = False | |
2925 elif len(marker) < 4: | |
2926 self.state_correction(context) | |
2927 if self.eofcheck: # ignore EOFError with sections | |
2928 lineno = self.state_machine.abs_line_number() - 1 | |
2929 transition = nodes.transition(rawsource=context[0]) | |
2930 transition.line = lineno | |
2931 self.parent += transition | |
2932 self.eofcheck = 1 | |
2933 return [] | |
2934 | |
2935 def blank(self, match, context, next_state): | |
2936 """Transition marker.""" | |
2937 src, srcline = self.state_machine.get_source_and_line() | |
2938 marker = context[0].strip() | |
2939 if len(marker) < 4: | |
2940 self.state_correction(context) | |
2941 transition = nodes.transition(rawsource=marker) | |
2942 transition.source = src | |
2943 transition.line = srcline - 1 | |
2944 self.parent += transition | |
2945 return [], 'Body', [] | |
2946 | |
2947 def text(self, match, context, next_state): | |
2948 """Potential over- & underlined title.""" | |
2949 lineno = self.state_machine.abs_line_number() - 1 | |
2950 overline = context[0] | |
2951 title = match.string | |
2952 underline = '' | |
2953 try: | |
2954 underline = self.state_machine.next_line() | |
2955 except EOFError: | |
2956 blocktext = overline + '\n' + title | |
2957 if len(overline.rstrip()) < 4: | |
2958 self.short_overline(context, blocktext, lineno, 2) | |
2959 else: | |
2960 msg = self.reporter.severe( | |
2961 'Incomplete section title.', | |
2962 nodes.literal_block(blocktext, blocktext), | |
2963 line=lineno) | |
2964 self.parent += msg | |
2965 return [], 'Body', [] | |
2966 source = '%s\n%s\n%s' % (overline, title, underline) | |
2967 overline = overline.rstrip() | |
2968 underline = underline.rstrip() | |
2969 if not self.transitions['underline'][0].match(underline): | |
2970 blocktext = overline + '\n' + title + '\n' + underline | |
2971 if len(overline.rstrip()) < 4: | |
2972 self.short_overline(context, blocktext, lineno, 2) | |
2973 else: | |
2974 msg = self.reporter.severe( | |
2975 'Missing matching underline for section title overline.', | |
2976 nodes.literal_block(source, source), | |
2977 line=lineno) | |
2978 self.parent += msg | |
2979 return [], 'Body', [] | |
2980 elif overline != underline: | |
2981 blocktext = overline + '\n' + title + '\n' + underline | |
2982 if len(overline.rstrip()) < 4: | |
2983 self.short_overline(context, blocktext, lineno, 2) | |
2984 else: | |
2985 msg = self.reporter.severe( | |
2986 'Title overline & underline mismatch.', | |
2987 nodes.literal_block(source, source), | |
2988 line=lineno) | |
2989 self.parent += msg | |
2990 return [], 'Body', [] | |
2991 title = title.rstrip() | |
2992 messages = [] | |
2993 if column_width(title) > len(overline): | |
2994 blocktext = overline + '\n' + title + '\n' + underline | |
2995 if len(overline.rstrip()) < 4: | |
2996 self.short_overline(context, blocktext, lineno, 2) | |
2997 else: | |
2998 msg = self.reporter.warning( | |
2999 'Title overline too short.', | |
3000 nodes.literal_block(source, source), | |
3001 line=lineno) | |
3002 messages.append(msg) | |
3003 style = (overline[0], underline[0]) | |
3004 self.eofcheck = 0 # @@@ not sure this is correct | |
3005 self.section(title.lstrip(), source, style, lineno + 1, messages) | |
3006 self.eofcheck = 1 | |
3007 return [], 'Body', [] | |
3008 | |
3009 indent = text # indented title | |
3010 | |
3011 def underline(self, match, context, next_state): | |
3012 overline = context[0] | |
3013 blocktext = overline + '\n' + self.state_machine.line | |
3014 lineno = self.state_machine.abs_line_number() - 1 | |
3015 if len(overline.rstrip()) < 4: | |
3016 self.short_overline(context, blocktext, lineno, 1) | |
3017 msg = self.reporter.error( | |
3018 'Invalid section title or transition marker.', | |
3019 nodes.literal_block(blocktext, blocktext), | |
3020 line=lineno) | |
3021 self.parent += msg | |
3022 return [], 'Body', [] | |
3023 | |
3024 def short_overline(self, context, blocktext, lineno, lines=1): | |
3025 msg = self.reporter.info( | |
3026 'Possible incomplete section title.\nTreating the overline as ' | |
3027 "ordinary text because it's so short.", | |
3028 line=lineno) | |
3029 self.parent += msg | |
3030 self.state_correction(context, lines) | |
3031 | |
3032 def state_correction(self, context, lines=1): | |
3033 self.state_machine.previous_line(lines) | |
3034 context[:] = [] | |
3035 raise statemachine.StateCorrection('Body', 'text') | |
3036 | |
3037 | |
3038 class QuotedLiteralBlock(RSTState): | |
3039 | |
3040 """ | |
3041 Nested parse handler for quoted (unindented) literal blocks. | |
3042 | |
3043 Special-purpose. Not for inclusion in `state_classes`. | |
3044 """ | |
3045 | |
3046 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats, | |
3047 'text': r''} | |
3048 initial_transitions = ('initial_quoted', 'text') | |
3049 | |
3050 def __init__(self, state_machine, debug=False): | |
3051 RSTState.__init__(self, state_machine, debug) | |
3052 self.messages = [] | |
3053 self.initial_lineno = None | |
3054 | |
3055 def blank(self, match, context, next_state): | |
3056 if context: | |
3057 raise EOFError | |
3058 else: | |
3059 return context, next_state, [] | |
3060 | |
3061 def eof(self, context): | |
3062 if context: | |
3063 src, srcline = self.state_machine.get_source_and_line( | |
3064 self.initial_lineno) | |
3065 text = '\n'.join(context) | |
3066 literal_block = nodes.literal_block(text, text) | |
3067 literal_block.source = src | |
3068 literal_block.line = srcline | |
3069 self.parent += literal_block | |
3070 else: | |
3071 self.parent += self.reporter.warning( | |
3072 'Literal block expected; none found.', | |
3073 line=self.state_machine.abs_line_number()) | |
3074 # src not available, because statemachine.input_lines is empty | |
3075 self.state_machine.previous_line() | |
3076 self.parent += self.messages | |
3077 return [] | |
3078 | |
3079 def indent(self, match, context, next_state): | |
3080 assert context, ('QuotedLiteralBlock.indent: context should not ' | |
3081 'be empty!') | |
3082 self.messages.append( | |
3083 self.reporter.error('Unexpected indentation.', | |
3084 line=self.state_machine.abs_line_number())) | |
3085 self.state_machine.previous_line() | |
3086 raise EOFError | |
3087 | |
3088 def initial_quoted(self, match, context, next_state): | |
3089 """Match arbitrary quote character on the first line only.""" | |
3090 self.remove_transition('initial_quoted') | |
3091 quote = match.string[0] | |
3092 pattern = re.compile(re.escape(quote), re.UNICODE) | |
3093 # New transition matches consistent quotes only: | |
3094 self.add_transition('quoted', | |
3095 (pattern, self.quoted, self.__class__.__name__)) | |
3096 self.initial_lineno = self.state_machine.abs_line_number() | |
3097 return [match.string], next_state, [] | |
3098 | |
3099 def quoted(self, match, context, next_state): | |
3100 """Match consistent quotes on subsequent lines.""" | |
3101 context.append(match.string) | |
3102 return context, next_state, [] | |
3103 | |
3104 def text(self, match, context, next_state): | |
3105 if context: | |
3106 self.messages.append( | |
3107 self.reporter.error('Inconsistent literal block quoting.', | |
3108 line=self.state_machine.abs_line_number())) | |
3109 self.state_machine.previous_line() | |
3110 raise EOFError | |
3111 | |
3112 | |
3113 state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, | |
3114 OptionList, LineBlock, ExtensionOptions, Explicit, Text, | |
3115 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List) | |
3116 """Standard set of State classes used to start `RSTStateMachine`.""" |