comparison env/lib/python3.7/site-packages/yaml/parser.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1
2 # The following YAML grammar is LL(1) and is parsed by a recursive descent
3 # parser.
4 #
5 # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
6 # implicit_document ::= block_node DOCUMENT-END*
7 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
8 # block_node_or_indentless_sequence ::=
9 # ALIAS
10 # | properties (block_content | indentless_block_sequence)?
11 # | block_content
12 # | indentless_block_sequence
13 # block_node ::= ALIAS
14 # | properties block_content?
15 # | block_content
16 # flow_node ::= ALIAS
17 # | properties flow_content?
18 # | flow_content
19 # properties ::= TAG ANCHOR? | ANCHOR TAG?
20 # block_content ::= block_collection | flow_collection | SCALAR
21 # flow_content ::= flow_collection | SCALAR
22 # block_collection ::= block_sequence | block_mapping
23 # flow_collection ::= flow_sequence | flow_mapping
24 # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
25 # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
26 # block_mapping ::= BLOCK-MAPPING_START
27 # ((KEY block_node_or_indentless_sequence?)?
28 # (VALUE block_node_or_indentless_sequence?)?)*
29 # BLOCK-END
30 # flow_sequence ::= FLOW-SEQUENCE-START
31 # (flow_sequence_entry FLOW-ENTRY)*
32 # flow_sequence_entry?
33 # FLOW-SEQUENCE-END
34 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
35 # flow_mapping ::= FLOW-MAPPING-START
36 # (flow_mapping_entry FLOW-ENTRY)*
37 # flow_mapping_entry?
38 # FLOW-MAPPING-END
39 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
40 #
41 # FIRST sets:
42 #
43 # stream: { STREAM-START }
44 # explicit_document: { DIRECTIVE DOCUMENT-START }
45 # implicit_document: FIRST(block_node)
46 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
47 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
48 # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
49 # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
50 # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
51 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
52 # block_sequence: { BLOCK-SEQUENCE-START }
53 # block_mapping: { BLOCK-MAPPING-START }
54 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
55 # indentless_sequence: { ENTRY }
56 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
57 # flow_sequence: { FLOW-SEQUENCE-START }
58 # flow_mapping: { FLOW-MAPPING-START }
59 # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
60 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
61
62 __all__ = ['Parser', 'ParserError']
63
64 from .error import MarkedYAMLError
65 from .tokens import *
66 from .events import *
67 from .scanner import *
68
69 class ParserError(MarkedYAMLError):
70 pass
71
72 class Parser:
73 # Since writing a recursive-descendant parser is a straightforward task, we
74 # do not give many comments here.
75
76 DEFAULT_TAGS = {
77 '!': '!',
78 '!!': 'tag:yaml.org,2002:',
79 }
80
81 def __init__(self):
82 self.current_event = None
83 self.yaml_version = None
84 self.tag_handles = {}
85 self.states = []
86 self.marks = []
87 self.state = self.parse_stream_start
88
89 def dispose(self):
90 # Reset the state attributes (to clear self-references)
91 self.states = []
92 self.state = None
93
94 def check_event(self, *choices):
95 # Check the type of the next event.
96 if self.current_event is None:
97 if self.state:
98 self.current_event = self.state()
99 if self.current_event is not None:
100 if not choices:
101 return True
102 for choice in choices:
103 if isinstance(self.current_event, choice):
104 return True
105 return False
106
107 def peek_event(self):
108 # Get the next event.
109 if self.current_event is None:
110 if self.state:
111 self.current_event = self.state()
112 return self.current_event
113
114 def get_event(self):
115 # Get the next event and proceed further.
116 if self.current_event is None:
117 if self.state:
118 self.current_event = self.state()
119 value = self.current_event
120 self.current_event = None
121 return value
122
123 # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
124 # implicit_document ::= block_node DOCUMENT-END*
125 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
126
127 def parse_stream_start(self):
128
129 # Parse the stream start.
130 token = self.get_token()
131 event = StreamStartEvent(token.start_mark, token.end_mark,
132 encoding=token.encoding)
133
134 # Prepare the next state.
135 self.state = self.parse_implicit_document_start
136
137 return event
138
139 def parse_implicit_document_start(self):
140
141 # Parse an implicit document.
142 if not self.check_token(DirectiveToken, DocumentStartToken,
143 StreamEndToken):
144 self.tag_handles = self.DEFAULT_TAGS
145 token = self.peek_token()
146 start_mark = end_mark = token.start_mark
147 event = DocumentStartEvent(start_mark, end_mark,
148 explicit=False)
149
150 # Prepare the next state.
151 self.states.append(self.parse_document_end)
152 self.state = self.parse_block_node
153
154 return event
155
156 else:
157 return self.parse_document_start()
158
159 def parse_document_start(self):
160
161 # Parse any extra document end indicators.
162 while self.check_token(DocumentEndToken):
163 self.get_token()
164
165 # Parse an explicit document.
166 if not self.check_token(StreamEndToken):
167 token = self.peek_token()
168 start_mark = token.start_mark
169 version, tags = self.process_directives()
170 if not self.check_token(DocumentStartToken):
171 raise ParserError(None, None,
172 "expected '<document start>', but found %r"
173 % self.peek_token().id,
174 self.peek_token().start_mark)
175 token = self.get_token()
176 end_mark = token.end_mark
177 event = DocumentStartEvent(start_mark, end_mark,
178 explicit=True, version=version, tags=tags)
179 self.states.append(self.parse_document_end)
180 self.state = self.parse_document_content
181 else:
182 # Parse the end of the stream.
183 token = self.get_token()
184 event = StreamEndEvent(token.start_mark, token.end_mark)
185 assert not self.states
186 assert not self.marks
187 self.state = None
188 return event
189
190 def parse_document_end(self):
191
192 # Parse the document end.
193 token = self.peek_token()
194 start_mark = end_mark = token.start_mark
195 explicit = False
196 if self.check_token(DocumentEndToken):
197 token = self.get_token()
198 end_mark = token.end_mark
199 explicit = True
200 event = DocumentEndEvent(start_mark, end_mark,
201 explicit=explicit)
202
203 # Prepare the next state.
204 self.state = self.parse_document_start
205
206 return event
207
208 def parse_document_content(self):
209 if self.check_token(DirectiveToken,
210 DocumentStartToken, DocumentEndToken, StreamEndToken):
211 event = self.process_empty_scalar(self.peek_token().start_mark)
212 self.state = self.states.pop()
213 return event
214 else:
215 return self.parse_block_node()
216
217 def process_directives(self):
218 self.yaml_version = None
219 self.tag_handles = {}
220 while self.check_token(DirectiveToken):
221 token = self.get_token()
222 if token.name == 'YAML':
223 if self.yaml_version is not None:
224 raise ParserError(None, None,
225 "found duplicate YAML directive", token.start_mark)
226 major, minor = token.value
227 if major != 1:
228 raise ParserError(None, None,
229 "found incompatible YAML document (version 1.* is required)",
230 token.start_mark)
231 self.yaml_version = token.value
232 elif token.name == 'TAG':
233 handle, prefix = token.value
234 if handle in self.tag_handles:
235 raise ParserError(None, None,
236 "duplicate tag handle %r" % handle,
237 token.start_mark)
238 self.tag_handles[handle] = prefix
239 if self.tag_handles:
240 value = self.yaml_version, self.tag_handles.copy()
241 else:
242 value = self.yaml_version, None
243 for key in self.DEFAULT_TAGS:
244 if key not in self.tag_handles:
245 self.tag_handles[key] = self.DEFAULT_TAGS[key]
246 return value
247
248 # block_node_or_indentless_sequence ::= ALIAS
249 # | properties (block_content | indentless_block_sequence)?
250 # | block_content
251 # | indentless_block_sequence
252 # block_node ::= ALIAS
253 # | properties block_content?
254 # | block_content
255 # flow_node ::= ALIAS
256 # | properties flow_content?
257 # | flow_content
258 # properties ::= TAG ANCHOR? | ANCHOR TAG?
259 # block_content ::= block_collection | flow_collection | SCALAR
260 # flow_content ::= flow_collection | SCALAR
261 # block_collection ::= block_sequence | block_mapping
262 # flow_collection ::= flow_sequence | flow_mapping
263
264 def parse_block_node(self):
265 return self.parse_node(block=True)
266
267 def parse_flow_node(self):
268 return self.parse_node()
269
270 def parse_block_node_or_indentless_sequence(self):
271 return self.parse_node(block=True, indentless_sequence=True)
272
273 def parse_node(self, block=False, indentless_sequence=False):
274 if self.check_token(AliasToken):
275 token = self.get_token()
276 event = AliasEvent(token.value, token.start_mark, token.end_mark)
277 self.state = self.states.pop()
278 else:
279 anchor = None
280 tag = None
281 start_mark = end_mark = tag_mark = None
282 if self.check_token(AnchorToken):
283 token = self.get_token()
284 start_mark = token.start_mark
285 end_mark = token.end_mark
286 anchor = token.value
287 if self.check_token(TagToken):
288 token = self.get_token()
289 tag_mark = token.start_mark
290 end_mark = token.end_mark
291 tag = token.value
292 elif self.check_token(TagToken):
293 token = self.get_token()
294 start_mark = tag_mark = token.start_mark
295 end_mark = token.end_mark
296 tag = token.value
297 if self.check_token(AnchorToken):
298 token = self.get_token()
299 end_mark = token.end_mark
300 anchor = token.value
301 if tag is not None:
302 handle, suffix = tag
303 if handle is not None:
304 if handle not in self.tag_handles:
305 raise ParserError("while parsing a node", start_mark,
306 "found undefined tag handle %r" % handle,
307 tag_mark)
308 tag = self.tag_handles[handle]+suffix
309 else:
310 tag = suffix
311 #if tag == '!':
312 # raise ParserError("while parsing a node", start_mark,
313 # "found non-specific tag '!'", tag_mark,
314 # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
315 if start_mark is None:
316 start_mark = end_mark = self.peek_token().start_mark
317 event = None
318 implicit = (tag is None or tag == '!')
319 if indentless_sequence and self.check_token(BlockEntryToken):
320 end_mark = self.peek_token().end_mark
321 event = SequenceStartEvent(anchor, tag, implicit,
322 start_mark, end_mark)
323 self.state = self.parse_indentless_sequence_entry
324 else:
325 if self.check_token(ScalarToken):
326 token = self.get_token()
327 end_mark = token.end_mark
328 if (token.plain and tag is None) or tag == '!':
329 implicit = (True, False)
330 elif tag is None:
331 implicit = (False, True)
332 else:
333 implicit = (False, False)
334 event = ScalarEvent(anchor, tag, implicit, token.value,
335 start_mark, end_mark, style=token.style)
336 self.state = self.states.pop()
337 elif self.check_token(FlowSequenceStartToken):
338 end_mark = self.peek_token().end_mark
339 event = SequenceStartEvent(anchor, tag, implicit,
340 start_mark, end_mark, flow_style=True)
341 self.state = self.parse_flow_sequence_first_entry
342 elif self.check_token(FlowMappingStartToken):
343 end_mark = self.peek_token().end_mark
344 event = MappingStartEvent(anchor, tag, implicit,
345 start_mark, end_mark, flow_style=True)
346 self.state = self.parse_flow_mapping_first_key
347 elif block and self.check_token(BlockSequenceStartToken):
348 end_mark = self.peek_token().start_mark
349 event = SequenceStartEvent(anchor, tag, implicit,
350 start_mark, end_mark, flow_style=False)
351 self.state = self.parse_block_sequence_first_entry
352 elif block and self.check_token(BlockMappingStartToken):
353 end_mark = self.peek_token().start_mark
354 event = MappingStartEvent(anchor, tag, implicit,
355 start_mark, end_mark, flow_style=False)
356 self.state = self.parse_block_mapping_first_key
357 elif anchor is not None or tag is not None:
358 # Empty scalars are allowed even if a tag or an anchor is
359 # specified.
360 event = ScalarEvent(anchor, tag, (implicit, False), '',
361 start_mark, end_mark)
362 self.state = self.states.pop()
363 else:
364 if block:
365 node = 'block'
366 else:
367 node = 'flow'
368 token = self.peek_token()
369 raise ParserError("while parsing a %s node" % node, start_mark,
370 "expected the node content, but found %r" % token.id,
371 token.start_mark)
372 return event
373
374 # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
375
376 def parse_block_sequence_first_entry(self):
377 token = self.get_token()
378 self.marks.append(token.start_mark)
379 return self.parse_block_sequence_entry()
380
381 def parse_block_sequence_entry(self):
382 if self.check_token(BlockEntryToken):
383 token = self.get_token()
384 if not self.check_token(BlockEntryToken, BlockEndToken):
385 self.states.append(self.parse_block_sequence_entry)
386 return self.parse_block_node()
387 else:
388 self.state = self.parse_block_sequence_entry
389 return self.process_empty_scalar(token.end_mark)
390 if not self.check_token(BlockEndToken):
391 token = self.peek_token()
392 raise ParserError("while parsing a block collection", self.marks[-1],
393 "expected <block end>, but found %r" % token.id, token.start_mark)
394 token = self.get_token()
395 event = SequenceEndEvent(token.start_mark, token.end_mark)
396 self.state = self.states.pop()
397 self.marks.pop()
398 return event
399
400 # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
401
402 def parse_indentless_sequence_entry(self):
403 if self.check_token(BlockEntryToken):
404 token = self.get_token()
405 if not self.check_token(BlockEntryToken,
406 KeyToken, ValueToken, BlockEndToken):
407 self.states.append(self.parse_indentless_sequence_entry)
408 return self.parse_block_node()
409 else:
410 self.state = self.parse_indentless_sequence_entry
411 return self.process_empty_scalar(token.end_mark)
412 token = self.peek_token()
413 event = SequenceEndEvent(token.start_mark, token.start_mark)
414 self.state = self.states.pop()
415 return event
416
417 # block_mapping ::= BLOCK-MAPPING_START
418 # ((KEY block_node_or_indentless_sequence?)?
419 # (VALUE block_node_or_indentless_sequence?)?)*
420 # BLOCK-END
421
422 def parse_block_mapping_first_key(self):
423 token = self.get_token()
424 self.marks.append(token.start_mark)
425 return self.parse_block_mapping_key()
426
427 def parse_block_mapping_key(self):
428 if self.check_token(KeyToken):
429 token = self.get_token()
430 if not self.check_token(KeyToken, ValueToken, BlockEndToken):
431 self.states.append(self.parse_block_mapping_value)
432 return self.parse_block_node_or_indentless_sequence()
433 else:
434 self.state = self.parse_block_mapping_value
435 return self.process_empty_scalar(token.end_mark)
436 if not self.check_token(BlockEndToken):
437 token = self.peek_token()
438 raise ParserError("while parsing a block mapping", self.marks[-1],
439 "expected <block end>, but found %r" % token.id, token.start_mark)
440 token = self.get_token()
441 event = MappingEndEvent(token.start_mark, token.end_mark)
442 self.state = self.states.pop()
443 self.marks.pop()
444 return event
445
446 def parse_block_mapping_value(self):
447 if self.check_token(ValueToken):
448 token = self.get_token()
449 if not self.check_token(KeyToken, ValueToken, BlockEndToken):
450 self.states.append(self.parse_block_mapping_key)
451 return self.parse_block_node_or_indentless_sequence()
452 else:
453 self.state = self.parse_block_mapping_key
454 return self.process_empty_scalar(token.end_mark)
455 else:
456 self.state = self.parse_block_mapping_key
457 token = self.peek_token()
458 return self.process_empty_scalar(token.start_mark)
459
460 # flow_sequence ::= FLOW-SEQUENCE-START
461 # (flow_sequence_entry FLOW-ENTRY)*
462 # flow_sequence_entry?
463 # FLOW-SEQUENCE-END
464 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
465 #
466 # Note that while production rules for both flow_sequence_entry and
467 # flow_mapping_entry are equal, their interpretations are different.
468 # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
469 # generate an inline mapping (set syntax).
470
471 def parse_flow_sequence_first_entry(self):
472 token = self.get_token()
473 self.marks.append(token.start_mark)
474 return self.parse_flow_sequence_entry(first=True)
475
476 def parse_flow_sequence_entry(self, first=False):
477 if not self.check_token(FlowSequenceEndToken):
478 if not first:
479 if self.check_token(FlowEntryToken):
480 self.get_token()
481 else:
482 token = self.peek_token()
483 raise ParserError("while parsing a flow sequence", self.marks[-1],
484 "expected ',' or ']', but got %r" % token.id, token.start_mark)
485
486 if self.check_token(KeyToken):
487 token = self.peek_token()
488 event = MappingStartEvent(None, None, True,
489 token.start_mark, token.end_mark,
490 flow_style=True)
491 self.state = self.parse_flow_sequence_entry_mapping_key
492 return event
493 elif not self.check_token(FlowSequenceEndToken):
494 self.states.append(self.parse_flow_sequence_entry)
495 return self.parse_flow_node()
496 token = self.get_token()
497 event = SequenceEndEvent(token.start_mark, token.end_mark)
498 self.state = self.states.pop()
499 self.marks.pop()
500 return event
501
502 def parse_flow_sequence_entry_mapping_key(self):
503 token = self.get_token()
504 if not self.check_token(ValueToken,
505 FlowEntryToken, FlowSequenceEndToken):
506 self.states.append(self.parse_flow_sequence_entry_mapping_value)
507 return self.parse_flow_node()
508 else:
509 self.state = self.parse_flow_sequence_entry_mapping_value
510 return self.process_empty_scalar(token.end_mark)
511
512 def parse_flow_sequence_entry_mapping_value(self):
513 if self.check_token(ValueToken):
514 token = self.get_token()
515 if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
516 self.states.append(self.parse_flow_sequence_entry_mapping_end)
517 return self.parse_flow_node()
518 else:
519 self.state = self.parse_flow_sequence_entry_mapping_end
520 return self.process_empty_scalar(token.end_mark)
521 else:
522 self.state = self.parse_flow_sequence_entry_mapping_end
523 token = self.peek_token()
524 return self.process_empty_scalar(token.start_mark)
525
526 def parse_flow_sequence_entry_mapping_end(self):
527 self.state = self.parse_flow_sequence_entry
528 token = self.peek_token()
529 return MappingEndEvent(token.start_mark, token.start_mark)
530
531 # flow_mapping ::= FLOW-MAPPING-START
532 # (flow_mapping_entry FLOW-ENTRY)*
533 # flow_mapping_entry?
534 # FLOW-MAPPING-END
535 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
536
537 def parse_flow_mapping_first_key(self):
538 token = self.get_token()
539 self.marks.append(token.start_mark)
540 return self.parse_flow_mapping_key(first=True)
541
542 def parse_flow_mapping_key(self, first=False):
543 if not self.check_token(FlowMappingEndToken):
544 if not first:
545 if self.check_token(FlowEntryToken):
546 self.get_token()
547 else:
548 token = self.peek_token()
549 raise ParserError("while parsing a flow mapping", self.marks[-1],
550 "expected ',' or '}', but got %r" % token.id, token.start_mark)
551 if self.check_token(KeyToken):
552 token = self.get_token()
553 if not self.check_token(ValueToken,
554 FlowEntryToken, FlowMappingEndToken):
555 self.states.append(self.parse_flow_mapping_value)
556 return self.parse_flow_node()
557 else:
558 self.state = self.parse_flow_mapping_value
559 return self.process_empty_scalar(token.end_mark)
560 elif not self.check_token(FlowMappingEndToken):
561 self.states.append(self.parse_flow_mapping_empty_value)
562 return self.parse_flow_node()
563 token = self.get_token()
564 event = MappingEndEvent(token.start_mark, token.end_mark)
565 self.state = self.states.pop()
566 self.marks.pop()
567 return event
568
569 def parse_flow_mapping_value(self):
570 if self.check_token(ValueToken):
571 token = self.get_token()
572 if not self.check_token(FlowEntryToken, FlowMappingEndToken):
573 self.states.append(self.parse_flow_mapping_key)
574 return self.parse_flow_node()
575 else:
576 self.state = self.parse_flow_mapping_key
577 return self.process_empty_scalar(token.end_mark)
578 else:
579 self.state = self.parse_flow_mapping_key
580 token = self.peek_token()
581 return self.process_empty_scalar(token.start_mark)
582
583 def parse_flow_mapping_empty_value(self):
584 self.state = self.parse_flow_mapping_key
585 return self.process_empty_scalar(self.peek_token().start_mark)
586
587 def process_empty_scalar(self, mark):
588 return ScalarEvent(None, None, (True, False), '', mark, mark)
589