Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/ruamel/yaml/scanner.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 # coding: utf-8 | |
| 2 | |
| 3 from __future__ import print_function, absolute_import, division, unicode_literals | |
| 4 | |
| 5 # Scanner produces tokens of the following types: | |
| 6 # STREAM-START | |
| 7 # STREAM-END | |
| 8 # DIRECTIVE(name, value) | |
| 9 # DOCUMENT-START | |
| 10 # DOCUMENT-END | |
| 11 # BLOCK-SEQUENCE-START | |
| 12 # BLOCK-MAPPING-START | |
| 13 # BLOCK-END | |
| 14 # FLOW-SEQUENCE-START | |
| 15 # FLOW-MAPPING-START | |
| 16 # FLOW-SEQUENCE-END | |
| 17 # FLOW-MAPPING-END | |
| 18 # BLOCK-ENTRY | |
| 19 # FLOW-ENTRY | |
| 20 # KEY | |
| 21 # VALUE | |
| 22 # ALIAS(value) | |
| 23 # ANCHOR(value) | |
| 24 # TAG(value) | |
| 25 # SCALAR(value, plain, style) | |
| 26 # | |
| 27 # RoundTripScanner | |
| 28 # COMMENT(value) | |
| 29 # | |
| 30 # Read comments in the Scanner code for more details. | |
| 31 # | |
| 32 | |
| 33 from ruamel.yaml.error import MarkedYAMLError | |
| 34 from ruamel.yaml.tokens import * # NOQA | |
| 35 from ruamel.yaml.compat import utf8, unichr, PY3, check_anchorname_char, nprint # NOQA | |
| 36 | |
| 37 if False: # MYPY | |
| 38 from typing import Any, Dict, Optional, List, Union, Text # NOQA | |
| 39 from ruamel.yaml.compat import VersionType # NOQA | |
| 40 | |
| 41 __all__ = ['Scanner', 'RoundTripScanner', 'ScannerError'] | |
| 42 | |
| 43 | |
| 44 _THE_END = '\n\0\r\x85\u2028\u2029' | |
| 45 _THE_END_SPACE_TAB = ' \n\0\t\r\x85\u2028\u2029' | |
| 46 _SPACE_TAB = ' \t' | |
| 47 | |
| 48 | |
| 49 class ScannerError(MarkedYAMLError): | |
| 50 pass | |
| 51 | |
| 52 | |
| 53 class SimpleKey(object): | |
| 54 # See below simple keys treatment. | |
| 55 | |
| 56 def __init__(self, token_number, required, index, line, column, mark): | |
| 57 # type: (Any, Any, int, int, int, Any) -> None | |
| 58 self.token_number = token_number | |
| 59 self.required = required | |
| 60 self.index = index | |
| 61 self.line = line | |
| 62 self.column = column | |
| 63 self.mark = mark | |
| 64 | |
| 65 | |
| 66 class Scanner(object): | |
| 67 def __init__(self, loader=None): | |
| 68 # type: (Any) -> None | |
| 69 """Initialize the scanner.""" | |
| 70 # It is assumed that Scanner and Reader will have a common descendant. | |
| 71 # Reader do the dirty work of checking for BOM and converting the | |
| 72 # input data to Unicode. It also adds NUL to the end. | |
| 73 # | |
| 74 # Reader supports the following methods | |
| 75 # self.peek(i=0) # peek the next i-th character | |
| 76 # self.prefix(l=1) # peek the next l characters | |
| 77 # self.forward(l=1) # read the next l characters and move the pointer | |
| 78 | |
| 79 self.loader = loader | |
| 80 if self.loader is not None and getattr(self.loader, '_scanner', None) is None: | |
| 81 self.loader._scanner = self | |
| 82 self.reset_scanner() | |
| 83 self.first_time = False | |
| 84 | |
| 85 @property | |
| 86 def flow_level(self): | |
| 87 # type: () -> int | |
| 88 return len(self.flow_context) | |
| 89 | |
| 90 def reset_scanner(self): | |
| 91 # type: () -> None | |
| 92 # Had we reached the end of the stream? | |
| 93 self.done = False | |
| 94 | |
| 95 # flow_context is an expanding/shrinking list consisting of '{' and '[' | |
| 96 # for each unclosed flow context. If empty list that means block context | |
| 97 self.flow_context = [] # type: List[Text] | |
| 98 | |
| 99 # List of processed tokens that are not yet emitted. | |
| 100 self.tokens = [] # type: List[Any] | |
| 101 | |
| 102 # Add the STREAM-START token. | |
| 103 self.fetch_stream_start() | |
| 104 | |
| 105 # Number of tokens that were emitted through the `get_token` method. | |
| 106 self.tokens_taken = 0 | |
| 107 | |
| 108 # The current indentation level. | |
| 109 self.indent = -1 | |
| 110 | |
| 111 # Past indentation levels. | |
| 112 self.indents = [] # type: List[int] | |
| 113 | |
| 114 # Variables related to simple keys treatment. | |
| 115 | |
| 116 # A simple key is a key that is not denoted by the '?' indicator. | |
| 117 # Example of simple keys: | |
| 118 # --- | |
| 119 # block simple key: value | |
| 120 # ? not a simple key: | |
| 121 # : { flow simple key: value } | |
| 122 # We emit the KEY token before all keys, so when we find a potential | |
| 123 # simple key, we try to locate the corresponding ':' indicator. | |
| 124 # Simple keys should be limited to a single line and 1024 characters. | |
| 125 | |
| 126 # Can a simple key start at the current position? A simple key may | |
| 127 # start: | |
| 128 # - at the beginning of the line, not counting indentation spaces | |
| 129 # (in block context), | |
| 130 # - after '{', '[', ',' (in the flow context), | |
| 131 # - after '?', ':', '-' (in the block context). | |
| 132 # In the block context, this flag also signifies if a block collection | |
| 133 # may start at the current position. | |
| 134 self.allow_simple_key = True | |
| 135 | |
| 136 # Keep track of possible simple keys. This is a dictionary. The key | |
| 137 # is `flow_level`; there can be no more that one possible simple key | |
| 138 # for each level. The value is a SimpleKey record: | |
| 139 # (token_number, required, index, line, column, mark) | |
| 140 # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), | |
| 141 # '[', or '{' tokens. | |
| 142 self.possible_simple_keys = {} # type: Dict[Any, Any] | |
| 143 | |
| 144 @property | |
| 145 def reader(self): | |
| 146 # type: () -> Any | |
| 147 try: | |
| 148 return self._scanner_reader # type: ignore | |
| 149 except AttributeError: | |
| 150 if hasattr(self.loader, 'typ'): | |
| 151 self._scanner_reader = self.loader.reader | |
| 152 else: | |
| 153 self._scanner_reader = self.loader._reader | |
| 154 return self._scanner_reader | |
| 155 | |
| 156 @property | |
| 157 def scanner_processing_version(self): # prefix until un-composited | |
| 158 # type: () -> VersionType | |
| 159 if hasattr(self.loader, 'typ'): | |
| 160 return self.loader.resolver.processing_version # type: ignore | |
| 161 return self.loader.processing_version # type: ignore | |
| 162 | |
| 163 # Public methods. | |
| 164 | |
| 165 def check_token(self, *choices): | |
| 166 # type: (Any) -> bool | |
| 167 # Check if the next token is one of the given types. | |
| 168 while self.need_more_tokens(): | |
| 169 self.fetch_more_tokens() | |
| 170 if bool(self.tokens): | |
| 171 if not choices: | |
| 172 return True | |
| 173 for choice in choices: | |
| 174 if isinstance(self.tokens[0], choice): | |
| 175 return True | |
| 176 return False | |
| 177 | |
| 178 def peek_token(self): | |
| 179 # type: () -> Any | |
| 180 # Return the next token, but do not delete if from the queue. | |
| 181 while self.need_more_tokens(): | |
| 182 self.fetch_more_tokens() | |
| 183 if bool(self.tokens): | |
| 184 return self.tokens[0] | |
| 185 | |
| 186 def get_token(self): | |
| 187 # type: () -> Any | |
| 188 # Return the next token. | |
| 189 while self.need_more_tokens(): | |
| 190 self.fetch_more_tokens() | |
| 191 if bool(self.tokens): | |
| 192 self.tokens_taken += 1 | |
| 193 return self.tokens.pop(0) | |
| 194 | |
| 195 # Private methods. | |
| 196 | |
| 197 def need_more_tokens(self): | |
| 198 # type: () -> bool | |
| 199 if self.done: | |
| 200 return False | |
| 201 if not self.tokens: | |
| 202 return True | |
| 203 # The current token may be a potential simple key, so we | |
| 204 # need to look further. | |
| 205 self.stale_possible_simple_keys() | |
| 206 if self.next_possible_simple_key() == self.tokens_taken: | |
| 207 return True | |
| 208 return False | |
| 209 | |
| 210 def fetch_comment(self, comment): | |
| 211 # type: (Any) -> None | |
| 212 raise NotImplementedError | |
| 213 | |
| 214 def fetch_more_tokens(self): | |
| 215 # type: () -> Any | |
| 216 # Eat whitespaces and comments until we reach the next token. | |
| 217 comment = self.scan_to_next_token() | |
| 218 if comment is not None: # never happens for base scanner | |
| 219 return self.fetch_comment(comment) | |
| 220 # Remove obsolete possible simple keys. | |
| 221 self.stale_possible_simple_keys() | |
| 222 | |
| 223 # Compare the current indentation and column. It may add some tokens | |
| 224 # and decrease the current indentation level. | |
| 225 self.unwind_indent(self.reader.column) | |
| 226 | |
| 227 # Peek the next character. | |
| 228 ch = self.reader.peek() | |
| 229 | |
| 230 # Is it the end of stream? | |
| 231 if ch == '\0': | |
| 232 return self.fetch_stream_end() | |
| 233 | |
| 234 # Is it a directive? | |
| 235 if ch == '%' and self.check_directive(): | |
| 236 return self.fetch_directive() | |
| 237 | |
| 238 # Is it the document start? | |
| 239 if ch == '-' and self.check_document_start(): | |
| 240 return self.fetch_document_start() | |
| 241 | |
| 242 # Is it the document end? | |
| 243 if ch == '.' and self.check_document_end(): | |
| 244 return self.fetch_document_end() | |
| 245 | |
| 246 # TODO: support for BOM within a stream. | |
| 247 # if ch == u'\uFEFF': | |
| 248 # return self.fetch_bom() <-- issue BOMToken | |
| 249 | |
| 250 # Note: the order of the following checks is NOT significant. | |
| 251 | |
| 252 # Is it the flow sequence start indicator? | |
| 253 if ch == '[': | |
| 254 return self.fetch_flow_sequence_start() | |
| 255 | |
| 256 # Is it the flow mapping start indicator? | |
| 257 if ch == '{': | |
| 258 return self.fetch_flow_mapping_start() | |
| 259 | |
| 260 # Is it the flow sequence end indicator? | |
| 261 if ch == ']': | |
| 262 return self.fetch_flow_sequence_end() | |
| 263 | |
| 264 # Is it the flow mapping end indicator? | |
| 265 if ch == '}': | |
| 266 return self.fetch_flow_mapping_end() | |
| 267 | |
| 268 # Is it the flow entry indicator? | |
| 269 if ch == ',': | |
| 270 return self.fetch_flow_entry() | |
| 271 | |
| 272 # Is it the block entry indicator? | |
| 273 if ch == '-' and self.check_block_entry(): | |
| 274 return self.fetch_block_entry() | |
| 275 | |
| 276 # Is it the key indicator? | |
| 277 if ch == '?' and self.check_key(): | |
| 278 return self.fetch_key() | |
| 279 | |
| 280 # Is it the value indicator? | |
| 281 if ch == ':' and self.check_value(): | |
| 282 return self.fetch_value() | |
| 283 | |
| 284 # Is it an alias? | |
| 285 if ch == '*': | |
| 286 return self.fetch_alias() | |
| 287 | |
| 288 # Is it an anchor? | |
| 289 if ch == '&': | |
| 290 return self.fetch_anchor() | |
| 291 | |
| 292 # Is it a tag? | |
| 293 if ch == '!': | |
| 294 return self.fetch_tag() | |
| 295 | |
| 296 # Is it a literal scalar? | |
| 297 if ch == '|' and not self.flow_level: | |
| 298 return self.fetch_literal() | |
| 299 | |
| 300 # Is it a folded scalar? | |
| 301 if ch == '>' and not self.flow_level: | |
| 302 return self.fetch_folded() | |
| 303 | |
| 304 # Is it a single quoted scalar? | |
| 305 if ch == "'": | |
| 306 return self.fetch_single() | |
| 307 | |
| 308 # Is it a double quoted scalar? | |
| 309 if ch == '"': | |
| 310 return self.fetch_double() | |
| 311 | |
| 312 # It must be a plain scalar then. | |
| 313 if self.check_plain(): | |
| 314 return self.fetch_plain() | |
| 315 | |
| 316 # No? It's an error. Let's produce a nice error message. | |
| 317 raise ScannerError( | |
| 318 'while scanning for the next token', | |
| 319 None, | |
| 320 'found character %r that cannot start any token' % utf8(ch), | |
| 321 self.reader.get_mark(), | |
| 322 ) | |
| 323 | |
| 324 # Simple keys treatment. | |
| 325 | |
| 326 def next_possible_simple_key(self): | |
| 327 # type: () -> Any | |
| 328 # Return the number of the nearest possible simple key. Actually we | |
| 329 # don't need to loop through the whole dictionary. We may replace it | |
| 330 # with the following code: | |
| 331 # if not self.possible_simple_keys: | |
| 332 # return None | |
| 333 # return self.possible_simple_keys[ | |
| 334 # min(self.possible_simple_keys.keys())].token_number | |
| 335 min_token_number = None | |
| 336 for level in self.possible_simple_keys: | |
| 337 key = self.possible_simple_keys[level] | |
| 338 if min_token_number is None or key.token_number < min_token_number: | |
| 339 min_token_number = key.token_number | |
| 340 return min_token_number | |
| 341 | |
| 342 def stale_possible_simple_keys(self): | |
| 343 # type: () -> None | |
| 344 # Remove entries that are no longer possible simple keys. According to | |
| 345 # the YAML specification, simple keys | |
| 346 # - should be limited to a single line, | |
| 347 # - should be no longer than 1024 characters. | |
| 348 # Disabling this procedure will allow simple keys of any length and | |
| 349 # height (may cause problems if indentation is broken though). | |
| 350 for level in list(self.possible_simple_keys): | |
| 351 key = self.possible_simple_keys[level] | |
| 352 if key.line != self.reader.line or self.reader.index - key.index > 1024: | |
| 353 if key.required: | |
| 354 raise ScannerError( | |
| 355 'while scanning a simple key', | |
| 356 key.mark, | |
| 357 "could not find expected ':'", | |
| 358 self.reader.get_mark(), | |
| 359 ) | |
| 360 del self.possible_simple_keys[level] | |
| 361 | |
| 362 def save_possible_simple_key(self): | |
| 363 # type: () -> None | |
| 364 # The next token may start a simple key. We check if it's possible | |
| 365 # and save its position. This function is called for | |
| 366 # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. | |
| 367 | |
| 368 # Check if a simple key is required at the current position. | |
| 369 required = not self.flow_level and self.indent == self.reader.column | |
| 370 | |
| 371 # The next token might be a simple key. Let's save it's number and | |
| 372 # position. | |
| 373 if self.allow_simple_key: | |
| 374 self.remove_possible_simple_key() | |
| 375 token_number = self.tokens_taken + len(self.tokens) | |
| 376 key = SimpleKey( | |
| 377 token_number, | |
| 378 required, | |
| 379 self.reader.index, | |
| 380 self.reader.line, | |
| 381 self.reader.column, | |
| 382 self.reader.get_mark(), | |
| 383 ) | |
| 384 self.possible_simple_keys[self.flow_level] = key | |
| 385 | |
| 386 def remove_possible_simple_key(self): | |
| 387 # type: () -> None | |
| 388 # Remove the saved possible key position at the current flow level. | |
| 389 if self.flow_level in self.possible_simple_keys: | |
| 390 key = self.possible_simple_keys[self.flow_level] | |
| 391 | |
| 392 if key.required: | |
| 393 raise ScannerError( | |
| 394 'while scanning a simple key', | |
| 395 key.mark, | |
| 396 "could not find expected ':'", | |
| 397 self.reader.get_mark(), | |
| 398 ) | |
| 399 | |
| 400 del self.possible_simple_keys[self.flow_level] | |
| 401 | |
| 402 # Indentation functions. | |
| 403 | |
| 404 def unwind_indent(self, column): | |
| 405 # type: (Any) -> None | |
| 406 # In flow context, tokens should respect indentation. | |
| 407 # Actually the condition should be `self.indent >= column` according to | |
| 408 # the spec. But this condition will prohibit intuitively correct | |
| 409 # constructions such as | |
| 410 # key : { | |
| 411 # } | |
| 412 # #### | |
| 413 # if self.flow_level and self.indent > column: | |
| 414 # raise ScannerError(None, None, | |
| 415 # "invalid intendation or unclosed '[' or '{'", | |
| 416 # self.reader.get_mark()) | |
| 417 | |
| 418 # In the flow context, indentation is ignored. We make the scanner less | |
| 419 # restrictive then specification requires. | |
| 420 if bool(self.flow_level): | |
| 421 return | |
| 422 | |
| 423 # In block context, we may need to issue the BLOCK-END tokens. | |
| 424 while self.indent > column: | |
| 425 mark = self.reader.get_mark() | |
| 426 self.indent = self.indents.pop() | |
| 427 self.tokens.append(BlockEndToken(mark, mark)) | |
| 428 | |
| 429 def add_indent(self, column): | |
| 430 # type: (int) -> bool | |
| 431 # Check if we need to increase indentation. | |
| 432 if self.indent < column: | |
| 433 self.indents.append(self.indent) | |
| 434 self.indent = column | |
| 435 return True | |
| 436 return False | |
| 437 | |
| 438 # Fetchers. | |
| 439 | |
| 440 def fetch_stream_start(self): | |
| 441 # type: () -> None | |
| 442 # We always add STREAM-START as the first token and STREAM-END as the | |
| 443 # last token. | |
| 444 # Read the token. | |
| 445 mark = self.reader.get_mark() | |
| 446 # Add STREAM-START. | |
| 447 self.tokens.append(StreamStartToken(mark, mark, encoding=self.reader.encoding)) | |
| 448 | |
| 449 def fetch_stream_end(self): | |
| 450 # type: () -> None | |
| 451 # Set the current intendation to -1. | |
| 452 self.unwind_indent(-1) | |
| 453 # Reset simple keys. | |
| 454 self.remove_possible_simple_key() | |
| 455 self.allow_simple_key = False | |
| 456 self.possible_simple_keys = {} # type: Dict[Any, Any] | |
| 457 # Read the token. | |
| 458 mark = self.reader.get_mark() | |
| 459 # Add STREAM-END. | |
| 460 self.tokens.append(StreamEndToken(mark, mark)) | |
| 461 # The steam is finished. | |
| 462 self.done = True | |
| 463 | |
| 464 def fetch_directive(self): | |
| 465 # type: () -> None | |
| 466 # Set the current intendation to -1. | |
| 467 self.unwind_indent(-1) | |
| 468 | |
| 469 # Reset simple keys. | |
| 470 self.remove_possible_simple_key() | |
| 471 self.allow_simple_key = False | |
| 472 | |
| 473 # Scan and add DIRECTIVE. | |
| 474 self.tokens.append(self.scan_directive()) | |
| 475 | |
| 476 def fetch_document_start(self): | |
| 477 # type: () -> None | |
| 478 self.fetch_document_indicator(DocumentStartToken) | |
| 479 | |
| 480 def fetch_document_end(self): | |
| 481 # type: () -> None | |
| 482 self.fetch_document_indicator(DocumentEndToken) | |
| 483 | |
| 484 def fetch_document_indicator(self, TokenClass): | |
| 485 # type: (Any) -> None | |
| 486 # Set the current intendation to -1. | |
| 487 self.unwind_indent(-1) | |
| 488 | |
| 489 # Reset simple keys. Note that there could not be a block collection | |
| 490 # after '---'. | |
| 491 self.remove_possible_simple_key() | |
| 492 self.allow_simple_key = False | |
| 493 | |
| 494 # Add DOCUMENT-START or DOCUMENT-END. | |
| 495 start_mark = self.reader.get_mark() | |
| 496 self.reader.forward(3) | |
| 497 end_mark = self.reader.get_mark() | |
| 498 self.tokens.append(TokenClass(start_mark, end_mark)) | |
| 499 | |
| 500 def fetch_flow_sequence_start(self): | |
| 501 # type: () -> None | |
| 502 self.fetch_flow_collection_start(FlowSequenceStartToken, to_push='[') | |
| 503 | |
| 504 def fetch_flow_mapping_start(self): | |
| 505 # type: () -> None | |
| 506 self.fetch_flow_collection_start(FlowMappingStartToken, to_push='{') | |
| 507 | |
| 508 def fetch_flow_collection_start(self, TokenClass, to_push): | |
| 509 # type: (Any, Text) -> None | |
| 510 # '[' and '{' may start a simple key. | |
| 511 self.save_possible_simple_key() | |
| 512 # Increase the flow level. | |
| 513 self.flow_context.append(to_push) | |
| 514 # Simple keys are allowed after '[' and '{'. | |
| 515 self.allow_simple_key = True | |
| 516 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. | |
| 517 start_mark = self.reader.get_mark() | |
| 518 self.reader.forward() | |
| 519 end_mark = self.reader.get_mark() | |
| 520 self.tokens.append(TokenClass(start_mark, end_mark)) | |
| 521 | |
| 522 def fetch_flow_sequence_end(self): | |
| 523 # type: () -> None | |
| 524 self.fetch_flow_collection_end(FlowSequenceEndToken) | |
| 525 | |
| 526 def fetch_flow_mapping_end(self): | |
| 527 # type: () -> None | |
| 528 self.fetch_flow_collection_end(FlowMappingEndToken) | |
| 529 | |
| 530 def fetch_flow_collection_end(self, TokenClass): | |
| 531 # type: (Any) -> None | |
| 532 # Reset possible simple key on the current level. | |
| 533 self.remove_possible_simple_key() | |
| 534 # Decrease the flow level. | |
| 535 try: | |
| 536 popped = self.flow_context.pop() # NOQA | |
| 537 except IndexError: | |
| 538 # We must not be in a list or object. | |
| 539 # Defer error handling to the parser. | |
| 540 pass | |
| 541 # No simple keys after ']' or '}'. | |
| 542 self.allow_simple_key = False | |
| 543 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. | |
| 544 start_mark = self.reader.get_mark() | |
| 545 self.reader.forward() | |
| 546 end_mark = self.reader.get_mark() | |
| 547 self.tokens.append(TokenClass(start_mark, end_mark)) | |
| 548 | |
| 549 def fetch_flow_entry(self): | |
| 550 # type: () -> None | |
| 551 # Simple keys are allowed after ','. | |
| 552 self.allow_simple_key = True | |
| 553 # Reset possible simple key on the current level. | |
| 554 self.remove_possible_simple_key() | |
| 555 # Add FLOW-ENTRY. | |
| 556 start_mark = self.reader.get_mark() | |
| 557 self.reader.forward() | |
| 558 end_mark = self.reader.get_mark() | |
| 559 self.tokens.append(FlowEntryToken(start_mark, end_mark)) | |
| 560 | |
| 561 def fetch_block_entry(self): | |
| 562 # type: () -> None | |
| 563 # Block context needs additional checks. | |
| 564 if not self.flow_level: | |
| 565 # Are we allowed to start a new entry? | |
| 566 if not self.allow_simple_key: | |
| 567 raise ScannerError( | |
| 568 None, None, 'sequence entries are not allowed here', self.reader.get_mark() | |
| 569 ) | |
| 570 # We may need to add BLOCK-SEQUENCE-START. | |
| 571 if self.add_indent(self.reader.column): | |
| 572 mark = self.reader.get_mark() | |
| 573 self.tokens.append(BlockSequenceStartToken(mark, mark)) | |
| 574 # It's an error for the block entry to occur in the flow context, | |
| 575 # but we let the parser detect this. | |
| 576 else: | |
| 577 pass | |
| 578 # Simple keys are allowed after '-'. | |
| 579 self.allow_simple_key = True | |
| 580 # Reset possible simple key on the current level. | |
| 581 self.remove_possible_simple_key() | |
| 582 | |
| 583 # Add BLOCK-ENTRY. | |
| 584 start_mark = self.reader.get_mark() | |
| 585 self.reader.forward() | |
| 586 end_mark = self.reader.get_mark() | |
| 587 self.tokens.append(BlockEntryToken(start_mark, end_mark)) | |
| 588 | |
| 589 def fetch_key(self): | |
| 590 # type: () -> None | |
| 591 # Block context needs additional checks. | |
| 592 if not self.flow_level: | |
| 593 | |
| 594 # Are we allowed to start a key (not nessesary a simple)? | |
| 595 if not self.allow_simple_key: | |
| 596 raise ScannerError( | |
| 597 None, None, 'mapping keys are not allowed here', self.reader.get_mark() | |
| 598 ) | |
| 599 | |
| 600 # We may need to add BLOCK-MAPPING-START. | |
| 601 if self.add_indent(self.reader.column): | |
| 602 mark = self.reader.get_mark() | |
| 603 self.tokens.append(BlockMappingStartToken(mark, mark)) | |
| 604 | |
| 605 # Simple keys are allowed after '?' in the block context. | |
| 606 self.allow_simple_key = not self.flow_level | |
| 607 | |
| 608 # Reset possible simple key on the current level. | |
| 609 self.remove_possible_simple_key() | |
| 610 | |
| 611 # Add KEY. | |
| 612 start_mark = self.reader.get_mark() | |
| 613 self.reader.forward() | |
| 614 end_mark = self.reader.get_mark() | |
| 615 self.tokens.append(KeyToken(start_mark, end_mark)) | |
| 616 | |
| 617 def fetch_value(self): | |
| 618 # type: () -> None | |
| 619 # Do we determine a simple key? | |
| 620 if self.flow_level in self.possible_simple_keys: | |
| 621 # Add KEY. | |
| 622 key = self.possible_simple_keys[self.flow_level] | |
| 623 del self.possible_simple_keys[self.flow_level] | |
| 624 self.tokens.insert( | |
| 625 key.token_number - self.tokens_taken, KeyToken(key.mark, key.mark) | |
| 626 ) | |
| 627 | |
| 628 # If this key starts a new block mapping, we need to add | |
| 629 # BLOCK-MAPPING-START. | |
| 630 if not self.flow_level: | |
| 631 if self.add_indent(key.column): | |
| 632 self.tokens.insert( | |
| 633 key.token_number - self.tokens_taken, | |
| 634 BlockMappingStartToken(key.mark, key.mark), | |
| 635 ) | |
| 636 | |
| 637 # There cannot be two simple keys one after another. | |
| 638 self.allow_simple_key = False | |
| 639 | |
| 640 # It must be a part of a complex key. | |
| 641 else: | |
| 642 | |
| 643 # Block context needs additional checks. | |
| 644 # (Do we really need them? They will be caught by the parser | |
| 645 # anyway.) | |
| 646 if not self.flow_level: | |
| 647 | |
| 648 # We are allowed to start a complex value if and only if | |
| 649 # we can start a simple key. | |
| 650 if not self.allow_simple_key: | |
| 651 raise ScannerError( | |
| 652 None, | |
| 653 None, | |
| 654 'mapping values are not allowed here', | |
| 655 self.reader.get_mark(), | |
| 656 ) | |
| 657 | |
| 658 # If this value starts a new block mapping, we need to add | |
| 659 # BLOCK-MAPPING-START. It will be detected as an error later by | |
| 660 # the parser. | |
| 661 if not self.flow_level: | |
| 662 if self.add_indent(self.reader.column): | |
| 663 mark = self.reader.get_mark() | |
| 664 self.tokens.append(BlockMappingStartToken(mark, mark)) | |
| 665 | |
| 666 # Simple keys are allowed after ':' in the block context. | |
| 667 self.allow_simple_key = not self.flow_level | |
| 668 | |
| 669 # Reset possible simple key on the current level. | |
| 670 self.remove_possible_simple_key() | |
| 671 | |
| 672 # Add VALUE. | |
| 673 start_mark = self.reader.get_mark() | |
| 674 self.reader.forward() | |
| 675 end_mark = self.reader.get_mark() | |
| 676 self.tokens.append(ValueToken(start_mark, end_mark)) | |
| 677 | |
| 678 def fetch_alias(self): | |
| 679 # type: () -> None | |
| 680 # ALIAS could be a simple key. | |
| 681 self.save_possible_simple_key() | |
| 682 # No simple keys after ALIAS. | |
| 683 self.allow_simple_key = False | |
| 684 # Scan and add ALIAS. | |
| 685 self.tokens.append(self.scan_anchor(AliasToken)) | |
| 686 | |
| 687 def fetch_anchor(self): | |
| 688 # type: () -> None | |
| 689 # ANCHOR could start a simple key. | |
| 690 self.save_possible_simple_key() | |
| 691 # No simple keys after ANCHOR. | |
| 692 self.allow_simple_key = False | |
| 693 # Scan and add ANCHOR. | |
| 694 self.tokens.append(self.scan_anchor(AnchorToken)) | |
| 695 | |
| 696 def fetch_tag(self): | |
| 697 # type: () -> None | |
| 698 # TAG could start a simple key. | |
| 699 self.save_possible_simple_key() | |
| 700 # No simple keys after TAG. | |
| 701 self.allow_simple_key = False | |
| 702 # Scan and add TAG. | |
| 703 self.tokens.append(self.scan_tag()) | |
| 704 | |
| 705 def fetch_literal(self): | |
| 706 # type: () -> None | |
| 707 self.fetch_block_scalar(style='|') | |
| 708 | |
| 709 def fetch_folded(self): | |
| 710 # type: () -> None | |
| 711 self.fetch_block_scalar(style='>') | |
| 712 | |
| 713 def fetch_block_scalar(self, style): | |
| 714 # type: (Any) -> None | |
| 715 # A simple key may follow a block scalar. | |
| 716 self.allow_simple_key = True | |
| 717 # Reset possible simple key on the current level. | |
| 718 self.remove_possible_simple_key() | |
| 719 # Scan and add SCALAR. | |
| 720 self.tokens.append(self.scan_block_scalar(style)) | |
| 721 | |
| 722 def fetch_single(self): | |
| 723 # type: () -> None | |
| 724 self.fetch_flow_scalar(style="'") | |
| 725 | |
| 726 def fetch_double(self): | |
| 727 # type: () -> None | |
| 728 self.fetch_flow_scalar(style='"') | |
| 729 | |
| 730 def fetch_flow_scalar(self, style): | |
| 731 # type: (Any) -> None | |
| 732 # A flow scalar could be a simple key. | |
| 733 self.save_possible_simple_key() | |
| 734 # No simple keys after flow scalars. | |
| 735 self.allow_simple_key = False | |
| 736 # Scan and add SCALAR. | |
| 737 self.tokens.append(self.scan_flow_scalar(style)) | |
| 738 | |
| 739 def fetch_plain(self): | |
| 740 # type: () -> None | |
| 741 # A plain scalar could be a simple key. | |
| 742 self.save_possible_simple_key() | |
| 743 # No simple keys after plain scalars. But note that `scan_plain` will | |
| 744 # change this flag if the scan is finished at the beginning of the | |
| 745 # line. | |
| 746 self.allow_simple_key = False | |
| 747 # Scan and add SCALAR. May change `allow_simple_key`. | |
| 748 self.tokens.append(self.scan_plain()) | |
| 749 | |
| 750 # Checkers. | |
| 751 | |
| 752 def check_directive(self): | |
| 753 # type: () -> Any | |
| 754 # DIRECTIVE: ^ '%' ... | |
| 755 # The '%' indicator is already checked. | |
| 756 if self.reader.column == 0: | |
| 757 return True | |
| 758 return None | |
| 759 | |
| 760 def check_document_start(self): | |
| 761 # type: () -> Any | |
| 762 # DOCUMENT-START: ^ '---' (' '|'\n') | |
| 763 if self.reader.column == 0: | |
| 764 if self.reader.prefix(3) == '---' and self.reader.peek(3) in _THE_END_SPACE_TAB: | |
| 765 return True | |
| 766 return None | |
| 767 | |
| 768 def check_document_end(self): | |
| 769 # type: () -> Any | |
| 770 # DOCUMENT-END: ^ '...' (' '|'\n') | |
| 771 if self.reader.column == 0: | |
| 772 if self.reader.prefix(3) == '...' and self.reader.peek(3) in _THE_END_SPACE_TAB: | |
| 773 return True | |
| 774 return None | |
| 775 | |
| 776 def check_block_entry(self): | |
| 777 # type: () -> Any | |
| 778 # BLOCK-ENTRY: '-' (' '|'\n') | |
| 779 return self.reader.peek(1) in _THE_END_SPACE_TAB | |
| 780 | |
| 781 def check_key(self): | |
| 782 # type: () -> Any | |
| 783 # KEY(flow context): '?' | |
| 784 if bool(self.flow_level): | |
| 785 return True | |
| 786 # KEY(block context): '?' (' '|'\n') | |
| 787 return self.reader.peek(1) in _THE_END_SPACE_TAB | |
| 788 | |
| 789 def check_value(self): | |
| 790 # type: () -> Any | |
| 791 # VALUE(flow context): ':' | |
| 792 if self.scanner_processing_version == (1, 1): | |
| 793 if bool(self.flow_level): | |
| 794 return True | |
| 795 else: | |
| 796 if bool(self.flow_level): | |
| 797 if self.flow_context[-1] == '[': | |
| 798 if self.reader.peek(1) not in _THE_END_SPACE_TAB: | |
| 799 return False | |
| 800 elif self.tokens and isinstance(self.tokens[-1], ValueToken): | |
| 801 # mapping flow context scanning a value token | |
| 802 if self.reader.peek(1) not in _THE_END_SPACE_TAB: | |
| 803 return False | |
| 804 return True | |
| 805 # VALUE(block context): ':' (' '|'\n') | |
| 806 return self.reader.peek(1) in _THE_END_SPACE_TAB | |
| 807 | |
| 808 def check_plain(self): | |
| 809 # type: () -> Any | |
| 810 # A plain scalar may start with any non-space character except: | |
| 811 # '-', '?', ':', ',', '[', ']', '{', '}', | |
| 812 # '#', '&', '*', '!', '|', '>', '\'', '\"', | |
| 813 # '%', '@', '`'. | |
| 814 # | |
| 815 # It may also start with | |
| 816 # '-', '?', ':' | |
| 817 # if it is followed by a non-space character. | |
| 818 # | |
| 819 # Note that we limit the last rule to the block context (except the | |
| 820 # '-' character) because we want the flow context to be space | |
| 821 # independent. | |
| 822 srp = self.reader.peek | |
| 823 ch = srp() | |
| 824 if self.scanner_processing_version == (1, 1): | |
| 825 return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`' or ( | |
| 826 srp(1) not in _THE_END_SPACE_TAB | |
| 827 and (ch == '-' or (not self.flow_level and ch in '?:')) | |
| 828 ) | |
| 829 # YAML 1.2 | |
| 830 if ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`': | |
| 831 # ################### ^ ??? | |
| 832 return True | |
| 833 ch1 = srp(1) | |
| 834 if ch == '-' and ch1 not in _THE_END_SPACE_TAB: | |
| 835 return True | |
| 836 if ch == ':' and bool(self.flow_level) and ch1 not in _SPACE_TAB: | |
| 837 return True | |
| 838 | |
| 839 return srp(1) not in _THE_END_SPACE_TAB and ( | |
| 840 ch == '-' or (not self.flow_level and ch in '?:') | |
| 841 ) | |
| 842 | |
| 843 # Scanners. | |
| 844 | |
| 845 def scan_to_next_token(self): | |
| 846 # type: () -> Any | |
| 847 # We ignore spaces, line breaks and comments. | |
| 848 # If we find a line break in the block context, we set the flag | |
| 849 # `allow_simple_key` on. | |
| 850 # The byte order mark is stripped if it's the first character in the | |
| 851 # stream. We do not yet support BOM inside the stream as the | |
| 852 # specification requires. Any such mark will be considered as a part | |
| 853 # of the document. | |
| 854 # | |
| 855 # TODO: We need to make tab handling rules more sane. A good rule is | |
| 856 # Tabs cannot precede tokens | |
| 857 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, | |
| 858 # KEY(block), VALUE(block), BLOCK-ENTRY | |
| 859 # So the checking code is | |
| 860 # if <TAB>: | |
| 861 # self.allow_simple_keys = False | |
| 862 # We also need to add the check for `allow_simple_keys == True` to | |
| 863 # `unwind_indent` before issuing BLOCK-END. | |
| 864 # Scanners for block, flow, and plain scalars need to be modified. | |
| 865 srp = self.reader.peek | |
| 866 srf = self.reader.forward | |
| 867 if self.reader.index == 0 and srp() == '\uFEFF': | |
| 868 srf() | |
| 869 found = False | |
| 870 _the_end = _THE_END | |
| 871 while not found: | |
| 872 while srp() == ' ': | |
| 873 srf() | |
| 874 if srp() == '#': | |
| 875 while srp() not in _the_end: | |
| 876 srf() | |
| 877 if self.scan_line_break(): | |
| 878 if not self.flow_level: | |
| 879 self.allow_simple_key = True | |
| 880 else: | |
| 881 found = True | |
| 882 return None | |
| 883 | |
| 884 def scan_directive(self): | |
| 885 # type: () -> Any | |
| 886 # See the specification for details. | |
| 887 srp = self.reader.peek | |
| 888 srf = self.reader.forward | |
| 889 start_mark = self.reader.get_mark() | |
| 890 srf() | |
| 891 name = self.scan_directive_name(start_mark) | |
| 892 value = None | |
| 893 if name == 'YAML': | |
| 894 value = self.scan_yaml_directive_value(start_mark) | |
| 895 end_mark = self.reader.get_mark() | |
| 896 elif name == 'TAG': | |
| 897 value = self.scan_tag_directive_value(start_mark) | |
| 898 end_mark = self.reader.get_mark() | |
| 899 else: | |
| 900 end_mark = self.reader.get_mark() | |
| 901 while srp() not in _THE_END: | |
| 902 srf() | |
| 903 self.scan_directive_ignored_line(start_mark) | |
| 904 return DirectiveToken(name, value, start_mark, end_mark) | |
| 905 | |
| 906 def scan_directive_name(self, start_mark): | |
| 907 # type: (Any) -> Any | |
| 908 # See the specification for details. | |
| 909 length = 0 | |
| 910 srp = self.reader.peek | |
| 911 ch = srp(length) | |
| 912 while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_:.': | |
| 913 length += 1 | |
| 914 ch = srp(length) | |
| 915 if not length: | |
| 916 raise ScannerError( | |
| 917 'while scanning a directive', | |
| 918 start_mark, | |
| 919 'expected alphabetic or numeric character, but found %r' % utf8(ch), | |
| 920 self.reader.get_mark(), | |
| 921 ) | |
| 922 value = self.reader.prefix(length) | |
| 923 self.reader.forward(length) | |
| 924 ch = srp() | |
| 925 if ch not in '\0 \r\n\x85\u2028\u2029': | |
| 926 raise ScannerError( | |
| 927 'while scanning a directive', | |
| 928 start_mark, | |
| 929 'expected alphabetic or numeric character, but found %r' % utf8(ch), | |
| 930 self.reader.get_mark(), | |
| 931 ) | |
| 932 return value | |
| 933 | |
| 934 def scan_yaml_directive_value(self, start_mark): | |
| 935 # type: (Any) -> Any | |
| 936 # See the specification for details. | |
| 937 srp = self.reader.peek | |
| 938 srf = self.reader.forward | |
| 939 while srp() == ' ': | |
| 940 srf() | |
| 941 major = self.scan_yaml_directive_number(start_mark) | |
| 942 if srp() != '.': | |
| 943 raise ScannerError( | |
| 944 'while scanning a directive', | |
| 945 start_mark, | |
| 946 "expected a digit or '.', but found %r" % utf8(srp()), | |
| 947 self.reader.get_mark(), | |
| 948 ) | |
| 949 srf() | |
| 950 minor = self.scan_yaml_directive_number(start_mark) | |
| 951 if srp() not in '\0 \r\n\x85\u2028\u2029': | |
| 952 raise ScannerError( | |
| 953 'while scanning a directive', | |
| 954 start_mark, | |
| 955 "expected a digit or ' ', but found %r" % utf8(srp()), | |
| 956 self.reader.get_mark(), | |
| 957 ) | |
| 958 return (major, minor) | |
| 959 | |
| 960 def scan_yaml_directive_number(self, start_mark): | |
| 961 # type: (Any) -> Any | |
| 962 # See the specification for details. | |
| 963 srp = self.reader.peek | |
| 964 srf = self.reader.forward | |
| 965 ch = srp() | |
| 966 if not ('0' <= ch <= '9'): | |
| 967 raise ScannerError( | |
| 968 'while scanning a directive', | |
| 969 start_mark, | |
| 970 'expected a digit, but found %r' % utf8(ch), | |
| 971 self.reader.get_mark(), | |
| 972 ) | |
| 973 length = 0 | |
| 974 while '0' <= srp(length) <= '9': | |
| 975 length += 1 | |
| 976 value = int(self.reader.prefix(length)) | |
| 977 srf(length) | |
| 978 return value | |
| 979 | |
| 980 def scan_tag_directive_value(self, start_mark): | |
| 981 # type: (Any) -> Any | |
| 982 # See the specification for details. | |
| 983 srp = self.reader.peek | |
| 984 srf = self.reader.forward | |
| 985 while srp() == ' ': | |
| 986 srf() | |
| 987 handle = self.scan_tag_directive_handle(start_mark) | |
| 988 while srp() == ' ': | |
| 989 srf() | |
| 990 prefix = self.scan_tag_directive_prefix(start_mark) | |
| 991 return (handle, prefix) | |
| 992 | |
| 993 def scan_tag_directive_handle(self, start_mark): | |
| 994 # type: (Any) -> Any | |
| 995 # See the specification for details. | |
| 996 value = self.scan_tag_handle('directive', start_mark) | |
| 997 ch = self.reader.peek() | |
| 998 if ch != ' ': | |
| 999 raise ScannerError( | |
| 1000 'while scanning a directive', | |
| 1001 start_mark, | |
| 1002 "expected ' ', but found %r" % utf8(ch), | |
| 1003 self.reader.get_mark(), | |
| 1004 ) | |
| 1005 return value | |
| 1006 | |
| 1007 def scan_tag_directive_prefix(self, start_mark): | |
| 1008 # type: (Any) -> Any | |
| 1009 # See the specification for details. | |
| 1010 value = self.scan_tag_uri('directive', start_mark) | |
| 1011 ch = self.reader.peek() | |
| 1012 if ch not in '\0 \r\n\x85\u2028\u2029': | |
| 1013 raise ScannerError( | |
| 1014 'while scanning a directive', | |
| 1015 start_mark, | |
| 1016 "expected ' ', but found %r" % utf8(ch), | |
| 1017 self.reader.get_mark(), | |
| 1018 ) | |
| 1019 return value | |
| 1020 | |
| 1021 def scan_directive_ignored_line(self, start_mark): | |
| 1022 # type: (Any) -> None | |
| 1023 # See the specification for details. | |
| 1024 srp = self.reader.peek | |
| 1025 srf = self.reader.forward | |
| 1026 while srp() == ' ': | |
| 1027 srf() | |
| 1028 if srp() == '#': | |
| 1029 while srp() not in _THE_END: | |
| 1030 srf() | |
| 1031 ch = srp() | |
| 1032 if ch not in _THE_END: | |
| 1033 raise ScannerError( | |
| 1034 'while scanning a directive', | |
| 1035 start_mark, | |
| 1036 'expected a comment or a line break, but found %r' % utf8(ch), | |
| 1037 self.reader.get_mark(), | |
| 1038 ) | |
| 1039 self.scan_line_break() | |
| 1040 | |
| 1041 def scan_anchor(self, TokenClass): | |
| 1042 # type: (Any) -> Any | |
| 1043 # The specification does not restrict characters for anchors and | |
| 1044 # aliases. This may lead to problems, for instance, the document: | |
| 1045 # [ *alias, value ] | |
| 1046 # can be interpteted in two ways, as | |
| 1047 # [ "value" ] | |
| 1048 # and | |
| 1049 # [ *alias , "value" ] | |
| 1050 # Therefore we restrict aliases to numbers and ASCII letters. | |
| 1051 srp = self.reader.peek | |
| 1052 start_mark = self.reader.get_mark() | |
| 1053 indicator = srp() | |
| 1054 if indicator == '*': | |
| 1055 name = 'alias' | |
| 1056 else: | |
| 1057 name = 'anchor' | |
| 1058 self.reader.forward() | |
| 1059 length = 0 | |
| 1060 ch = srp(length) | |
| 1061 # while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ | |
| 1062 # or ch in u'-_': | |
| 1063 while check_anchorname_char(ch): | |
| 1064 length += 1 | |
| 1065 ch = srp(length) | |
| 1066 if not length: | |
| 1067 raise ScannerError( | |
| 1068 'while scanning an %s' % (name,), | |
| 1069 start_mark, | |
| 1070 'expected alphabetic or numeric character, but found %r' % utf8(ch), | |
| 1071 self.reader.get_mark(), | |
| 1072 ) | |
| 1073 value = self.reader.prefix(length) | |
| 1074 self.reader.forward(length) | |
| 1075 # ch1 = ch | |
| 1076 # ch = srp() # no need to peek, ch is already set | |
| 1077 # assert ch1 == ch | |
| 1078 if ch not in '\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`': | |
| 1079 raise ScannerError( | |
| 1080 'while scanning an %s' % (name,), | |
| 1081 start_mark, | |
| 1082 'expected alphabetic or numeric character, but found %r' % utf8(ch), | |
| 1083 self.reader.get_mark(), | |
| 1084 ) | |
| 1085 end_mark = self.reader.get_mark() | |
| 1086 return TokenClass(value, start_mark, end_mark) | |
| 1087 | |
| 1088 def scan_tag(self): | |
| 1089 # type: () -> Any | |
| 1090 # See the specification for details. | |
| 1091 srp = self.reader.peek | |
| 1092 start_mark = self.reader.get_mark() | |
| 1093 ch = srp(1) | |
| 1094 if ch == '<': | |
| 1095 handle = None | |
| 1096 self.reader.forward(2) | |
| 1097 suffix = self.scan_tag_uri('tag', start_mark) | |
| 1098 if srp() != '>': | |
| 1099 raise ScannerError( | |
| 1100 'while parsing a tag', | |
| 1101 start_mark, | |
| 1102 "expected '>', but found %r" % utf8(srp()), | |
| 1103 self.reader.get_mark(), | |
| 1104 ) | |
| 1105 self.reader.forward() | |
| 1106 elif ch in _THE_END_SPACE_TAB: | |
| 1107 handle = None | |
| 1108 suffix = '!' | |
| 1109 self.reader.forward() | |
| 1110 else: | |
| 1111 length = 1 | |
| 1112 use_handle = False | |
| 1113 while ch not in '\0 \r\n\x85\u2028\u2029': | |
| 1114 if ch == '!': | |
| 1115 use_handle = True | |
| 1116 break | |
| 1117 length += 1 | |
| 1118 ch = srp(length) | |
| 1119 handle = '!' | |
| 1120 if use_handle: | |
| 1121 handle = self.scan_tag_handle('tag', start_mark) | |
| 1122 else: | |
| 1123 handle = '!' | |
| 1124 self.reader.forward() | |
| 1125 suffix = self.scan_tag_uri('tag', start_mark) | |
| 1126 ch = srp() | |
| 1127 if ch not in '\0 \r\n\x85\u2028\u2029': | |
| 1128 raise ScannerError( | |
| 1129 'while scanning a tag', | |
| 1130 start_mark, | |
| 1131 "expected ' ', but found %r" % utf8(ch), | |
| 1132 self.reader.get_mark(), | |
| 1133 ) | |
| 1134 value = (handle, suffix) | |
| 1135 end_mark = self.reader.get_mark() | |
| 1136 return TagToken(value, start_mark, end_mark) | |
| 1137 | |
| 1138 def scan_block_scalar(self, style, rt=False): | |
| 1139 # type: (Any, Optional[bool]) -> Any | |
| 1140 # See the specification for details. | |
| 1141 srp = self.reader.peek | |
| 1142 if style == '>': | |
| 1143 folded = True | |
| 1144 else: | |
| 1145 folded = False | |
| 1146 | |
| 1147 chunks = [] # type: List[Any] | |
| 1148 start_mark = self.reader.get_mark() | |
| 1149 | |
| 1150 # Scan the header. | |
| 1151 self.reader.forward() | |
| 1152 chomping, increment = self.scan_block_scalar_indicators(start_mark) | |
| 1153 # block scalar comment e.g. : |+ # comment text | |
| 1154 block_scalar_comment = self.scan_block_scalar_ignored_line(start_mark) | |
| 1155 | |
| 1156 # Determine the indentation level and go to the first non-empty line. | |
| 1157 min_indent = self.indent + 1 | |
| 1158 if increment is None: | |
| 1159 # no increment and top level, min_indent could be 0 | |
| 1160 if min_indent < 1 and ( | |
| 1161 style not in '|>' | |
| 1162 or (self.scanner_processing_version == (1, 1)) | |
| 1163 and getattr( | |
| 1164 self.loader, 'top_level_block_style_scalar_no_indent_error_1_1', False | |
| 1165 ) | |
| 1166 ): | |
| 1167 min_indent = 1 | |
| 1168 breaks, max_indent, end_mark = self.scan_block_scalar_indentation() | |
| 1169 indent = max(min_indent, max_indent) | |
| 1170 else: | |
| 1171 if min_indent < 1: | |
| 1172 min_indent = 1 | |
| 1173 indent = min_indent + increment - 1 | |
| 1174 breaks, end_mark = self.scan_block_scalar_breaks(indent) | |
| 1175 line_break = "" | |
| 1176 | |
| 1177 # Scan the inner part of the block scalar. | |
| 1178 while self.reader.column == indent and srp() != '\0': | |
| 1179 chunks.extend(breaks) | |
| 1180 leading_non_space = srp() not in ' \t' | |
| 1181 length = 0 | |
| 1182 while srp(length) not in _THE_END: | |
| 1183 length += 1 | |
| 1184 chunks.append(self.reader.prefix(length)) | |
| 1185 self.reader.forward(length) | |
| 1186 line_break = self.scan_line_break() | |
| 1187 breaks, end_mark = self.scan_block_scalar_breaks(indent) | |
| 1188 if style in '|>' and min_indent == 0: | |
| 1189 # at the beginning of a line, if in block style see if | |
| 1190 # end of document/start_new_document | |
| 1191 if self.check_document_start() or self.check_document_end(): | |
| 1192 break | |
| 1193 if self.reader.column == indent and srp() != '\0': | |
| 1194 | |
| 1195 # Unfortunately, folding rules are ambiguous. | |
| 1196 # | |
| 1197 # This is the folding according to the specification: | |
| 1198 | |
| 1199 if rt and folded and line_break == '\n': | |
| 1200 chunks.append('\a') | |
| 1201 if folded and line_break == '\n' and leading_non_space and srp() not in ' \t': | |
| 1202 if not breaks: | |
| 1203 chunks.append(' ') | |
| 1204 else: | |
| 1205 chunks.append(line_break) | |
| 1206 | |
| 1207 # This is Clark Evans's interpretation (also in the spec | |
| 1208 # examples): | |
| 1209 # | |
| 1210 # if folded and line_break == u'\n': | |
| 1211 # if not breaks: | |
| 1212 # if srp() not in ' \t': | |
| 1213 # chunks.append(u' ') | |
| 1214 # else: | |
| 1215 # chunks.append(line_break) | |
| 1216 # else: | |
| 1217 # chunks.append(line_break) | |
| 1218 else: | |
| 1219 break | |
| 1220 | |
| 1221 # Process trailing line breaks. The 'chomping' setting determines | |
| 1222 # whether they are included in the value. | |
| 1223 trailing = [] # type: List[Any] | |
| 1224 if chomping in [None, True]: | |
| 1225 chunks.append(line_break) | |
| 1226 if chomping is True: | |
| 1227 chunks.extend(breaks) | |
| 1228 elif chomping in [None, False]: | |
| 1229 trailing.extend(breaks) | |
| 1230 | |
| 1231 # We are done. | |
| 1232 token = ScalarToken("".join(chunks), False, start_mark, end_mark, style) | |
| 1233 if block_scalar_comment is not None: | |
| 1234 token.add_pre_comments([block_scalar_comment]) | |
| 1235 if len(trailing) > 0: | |
| 1236 # nprint('trailing 1', trailing) # XXXXX | |
| 1237 # Eat whitespaces and comments until we reach the next token. | |
| 1238 comment = self.scan_to_next_token() | |
| 1239 while comment: | |
| 1240 trailing.append(' ' * comment[1].column + comment[0]) | |
| 1241 comment = self.scan_to_next_token() | |
| 1242 | |
| 1243 # Keep track of the trailing whitespace and following comments | |
| 1244 # as a comment token, if isn't all included in the actual value. | |
| 1245 comment_end_mark = self.reader.get_mark() | |
| 1246 comment = CommentToken("".join(trailing), end_mark, comment_end_mark) | |
| 1247 token.add_post_comment(comment) | |
| 1248 return token | |
| 1249 | |
| 1250 def scan_block_scalar_indicators(self, start_mark): | |
| 1251 # type: (Any) -> Any | |
| 1252 # See the specification for details. | |
| 1253 srp = self.reader.peek | |
| 1254 chomping = None | |
| 1255 increment = None | |
| 1256 ch = srp() | |
| 1257 if ch in '+-': | |
| 1258 if ch == '+': | |
| 1259 chomping = True | |
| 1260 else: | |
| 1261 chomping = False | |
| 1262 self.reader.forward() | |
| 1263 ch = srp() | |
| 1264 if ch in '0123456789': | |
| 1265 increment = int(ch) | |
| 1266 if increment == 0: | |
| 1267 raise ScannerError( | |
| 1268 'while scanning a block scalar', | |
| 1269 start_mark, | |
| 1270 'expected indentation indicator in the range 1-9, ' 'but found 0', | |
| 1271 self.reader.get_mark(), | |
| 1272 ) | |
| 1273 self.reader.forward() | |
| 1274 elif ch in '0123456789': | |
| 1275 increment = int(ch) | |
| 1276 if increment == 0: | |
| 1277 raise ScannerError( | |
| 1278 'while scanning a block scalar', | |
| 1279 start_mark, | |
| 1280 'expected indentation indicator in the range 1-9, ' 'but found 0', | |
| 1281 self.reader.get_mark(), | |
| 1282 ) | |
| 1283 self.reader.forward() | |
| 1284 ch = srp() | |
| 1285 if ch in '+-': | |
| 1286 if ch == '+': | |
| 1287 chomping = True | |
| 1288 else: | |
| 1289 chomping = False | |
| 1290 self.reader.forward() | |
| 1291 ch = srp() | |
| 1292 if ch not in '\0 \r\n\x85\u2028\u2029': | |
| 1293 raise ScannerError( | |
| 1294 'while scanning a block scalar', | |
| 1295 start_mark, | |
| 1296 'expected chomping or indentation indicators, but found %r' % utf8(ch), | |
| 1297 self.reader.get_mark(), | |
| 1298 ) | |
| 1299 return chomping, increment | |
| 1300 | |
| 1301 def scan_block_scalar_ignored_line(self, start_mark): | |
| 1302 # type: (Any) -> Any | |
| 1303 # See the specification for details. | |
| 1304 srp = self.reader.peek | |
| 1305 srf = self.reader.forward | |
| 1306 prefix = '' | |
| 1307 comment = None | |
| 1308 while srp() == ' ': | |
| 1309 prefix += srp() | |
| 1310 srf() | |
| 1311 if srp() == '#': | |
| 1312 comment = prefix | |
| 1313 while srp() not in _THE_END: | |
| 1314 comment += srp() | |
| 1315 srf() | |
| 1316 ch = srp() | |
| 1317 if ch not in _THE_END: | |
| 1318 raise ScannerError( | |
| 1319 'while scanning a block scalar', | |
| 1320 start_mark, | |
| 1321 'expected a comment or a line break, but found %r' % utf8(ch), | |
| 1322 self.reader.get_mark(), | |
| 1323 ) | |
| 1324 self.scan_line_break() | |
| 1325 return comment | |
| 1326 | |
| 1327 def scan_block_scalar_indentation(self): | |
| 1328 # type: () -> Any | |
| 1329 # See the specification for details. | |
| 1330 srp = self.reader.peek | |
| 1331 srf = self.reader.forward | |
| 1332 chunks = [] | |
| 1333 max_indent = 0 | |
| 1334 end_mark = self.reader.get_mark() | |
| 1335 while srp() in ' \r\n\x85\u2028\u2029': | |
| 1336 if srp() != ' ': | |
| 1337 chunks.append(self.scan_line_break()) | |
| 1338 end_mark = self.reader.get_mark() | |
| 1339 else: | |
| 1340 srf() | |
| 1341 if self.reader.column > max_indent: | |
| 1342 max_indent = self.reader.column | |
| 1343 return chunks, max_indent, end_mark | |
| 1344 | |
| 1345 def scan_block_scalar_breaks(self, indent): | |
| 1346 # type: (int) -> Any | |
| 1347 # See the specification for details. | |
| 1348 chunks = [] | |
| 1349 srp = self.reader.peek | |
| 1350 srf = self.reader.forward | |
| 1351 end_mark = self.reader.get_mark() | |
| 1352 while self.reader.column < indent and srp() == ' ': | |
| 1353 srf() | |
| 1354 while srp() in '\r\n\x85\u2028\u2029': | |
| 1355 chunks.append(self.scan_line_break()) | |
| 1356 end_mark = self.reader.get_mark() | |
| 1357 while self.reader.column < indent and srp() == ' ': | |
| 1358 srf() | |
| 1359 return chunks, end_mark | |
| 1360 | |
| 1361 def scan_flow_scalar(self, style): | |
| 1362 # type: (Any) -> Any | |
| 1363 # See the specification for details. | |
| 1364 # Note that we loose indentation rules for quoted scalars. Quoted | |
| 1365 # scalars don't need to adhere indentation because " and ' clearly | |
| 1366 # mark the beginning and the end of them. Therefore we are less | |
| 1367 # restrictive then the specification requires. We only need to check | |
| 1368 # that document separators are not included in scalars. | |
| 1369 if style == '"': | |
| 1370 double = True | |
| 1371 else: | |
| 1372 double = False | |
| 1373 srp = self.reader.peek | |
| 1374 chunks = [] # type: List[Any] | |
| 1375 start_mark = self.reader.get_mark() | |
| 1376 quote = srp() | |
| 1377 self.reader.forward() | |
| 1378 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) | |
| 1379 while srp() != quote: | |
| 1380 chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) | |
| 1381 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) | |
| 1382 self.reader.forward() | |
| 1383 end_mark = self.reader.get_mark() | |
| 1384 return ScalarToken("".join(chunks), False, start_mark, end_mark, style) | |
| 1385 | |
| 1386 ESCAPE_REPLACEMENTS = { | |
| 1387 '0': '\0', | |
| 1388 'a': '\x07', | |
| 1389 'b': '\x08', | |
| 1390 't': '\x09', | |
| 1391 '\t': '\x09', | |
| 1392 'n': '\x0A', | |
| 1393 'v': '\x0B', | |
| 1394 'f': '\x0C', | |
| 1395 'r': '\x0D', | |
| 1396 'e': '\x1B', | |
| 1397 ' ': '\x20', | |
| 1398 '"': '"', | |
| 1399 '/': '/', # as per http://www.json.org/ | |
| 1400 '\\': '\\', | |
| 1401 'N': '\x85', | |
| 1402 '_': '\xA0', | |
| 1403 'L': '\u2028', | |
| 1404 'P': '\u2029', | |
| 1405 } | |
| 1406 | |
| 1407 ESCAPE_CODES = {'x': 2, 'u': 4, 'U': 8} | |
| 1408 | |
| 1409 def scan_flow_scalar_non_spaces(self, double, start_mark): | |
| 1410 # type: (Any, Any) -> Any | |
| 1411 # See the specification for details. | |
| 1412 chunks = [] # type: List[Any] | |
| 1413 srp = self.reader.peek | |
| 1414 srf = self.reader.forward | |
| 1415 while True: | |
| 1416 length = 0 | |
| 1417 while srp(length) not in ' \n\'"\\\0\t\r\x85\u2028\u2029': | |
| 1418 length += 1 | |
| 1419 if length != 0: | |
| 1420 chunks.append(self.reader.prefix(length)) | |
| 1421 srf(length) | |
| 1422 ch = srp() | |
| 1423 if not double and ch == "'" and srp(1) == "'": | |
| 1424 chunks.append("'") | |
| 1425 srf(2) | |
| 1426 elif (double and ch == "'") or (not double and ch in '"\\'): | |
| 1427 chunks.append(ch) | |
| 1428 srf() | |
| 1429 elif double and ch == '\\': | |
| 1430 srf() | |
| 1431 ch = srp() | |
| 1432 if ch in self.ESCAPE_REPLACEMENTS: | |
| 1433 chunks.append(self.ESCAPE_REPLACEMENTS[ch]) | |
| 1434 srf() | |
| 1435 elif ch in self.ESCAPE_CODES: | |
| 1436 length = self.ESCAPE_CODES[ch] | |
| 1437 srf() | |
| 1438 for k in range(length): | |
| 1439 if srp(k) not in '0123456789ABCDEFabcdef': | |
| 1440 raise ScannerError( | |
| 1441 'while scanning a double-quoted scalar', | |
| 1442 start_mark, | |
| 1443 'expected escape sequence of %d hexdecimal ' | |
| 1444 'numbers, but found %r' % (length, utf8(srp(k))), | |
| 1445 self.reader.get_mark(), | |
| 1446 ) | |
| 1447 code = int(self.reader.prefix(length), 16) | |
| 1448 chunks.append(unichr(code)) | |
| 1449 srf(length) | |
| 1450 elif ch in '\n\r\x85\u2028\u2029': | |
| 1451 self.scan_line_break() | |
| 1452 chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) | |
| 1453 else: | |
| 1454 raise ScannerError( | |
| 1455 'while scanning a double-quoted scalar', | |
| 1456 start_mark, | |
| 1457 'found unknown escape character %r' % utf8(ch), | |
| 1458 self.reader.get_mark(), | |
| 1459 ) | |
| 1460 else: | |
| 1461 return chunks | |
| 1462 | |
| 1463 def scan_flow_scalar_spaces(self, double, start_mark): | |
| 1464 # type: (Any, Any) -> Any | |
| 1465 # See the specification for details. | |
| 1466 srp = self.reader.peek | |
| 1467 chunks = [] | |
| 1468 length = 0 | |
| 1469 while srp(length) in ' \t': | |
| 1470 length += 1 | |
| 1471 whitespaces = self.reader.prefix(length) | |
| 1472 self.reader.forward(length) | |
| 1473 ch = srp() | |
| 1474 if ch == '\0': | |
| 1475 raise ScannerError( | |
| 1476 'while scanning a quoted scalar', | |
| 1477 start_mark, | |
| 1478 'found unexpected end of stream', | |
| 1479 self.reader.get_mark(), | |
| 1480 ) | |
| 1481 elif ch in '\r\n\x85\u2028\u2029': | |
| 1482 line_break = self.scan_line_break() | |
| 1483 breaks = self.scan_flow_scalar_breaks(double, start_mark) | |
| 1484 if line_break != '\n': | |
| 1485 chunks.append(line_break) | |
| 1486 elif not breaks: | |
| 1487 chunks.append(' ') | |
| 1488 chunks.extend(breaks) | |
| 1489 else: | |
| 1490 chunks.append(whitespaces) | |
| 1491 return chunks | |
| 1492 | |
| 1493 def scan_flow_scalar_breaks(self, double, start_mark): | |
| 1494 # type: (Any, Any) -> Any | |
| 1495 # See the specification for details. | |
| 1496 chunks = [] # type: List[Any] | |
| 1497 srp = self.reader.peek | |
| 1498 srf = self.reader.forward | |
| 1499 while True: | |
| 1500 # Instead of checking indentation, we check for document | |
| 1501 # separators. | |
| 1502 prefix = self.reader.prefix(3) | |
| 1503 if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: | |
| 1504 raise ScannerError( | |
| 1505 'while scanning a quoted scalar', | |
| 1506 start_mark, | |
| 1507 'found unexpected document separator', | |
| 1508 self.reader.get_mark(), | |
| 1509 ) | |
| 1510 while srp() in ' \t': | |
| 1511 srf() | |
| 1512 if srp() in '\r\n\x85\u2028\u2029': | |
| 1513 chunks.append(self.scan_line_break()) | |
| 1514 else: | |
| 1515 return chunks | |
| 1516 | |
| 1517 def scan_plain(self): | |
| 1518 # type: () -> Any | |
| 1519 # See the specification for details. | |
| 1520 # We add an additional restriction for the flow context: | |
| 1521 # plain scalars in the flow context cannot contain ',', ': ' and '?'. | |
| 1522 # We also keep track of the `allow_simple_key` flag here. | |
| 1523 # Indentation rules are loosed for the flow context. | |
| 1524 srp = self.reader.peek | |
| 1525 srf = self.reader.forward | |
| 1526 chunks = [] # type: List[Any] | |
| 1527 start_mark = self.reader.get_mark() | |
| 1528 end_mark = start_mark | |
| 1529 indent = self.indent + 1 | |
| 1530 # We allow zero indentation for scalars, but then we need to check for | |
| 1531 # document separators at the beginning of the line. | |
| 1532 # if indent == 0: | |
| 1533 # indent = 1 | |
| 1534 spaces = [] # type: List[Any] | |
| 1535 while True: | |
| 1536 length = 0 | |
| 1537 if srp() == '#': | |
| 1538 break | |
| 1539 while True: | |
| 1540 ch = srp(length) | |
| 1541 if ch == ':' and srp(length + 1) not in _THE_END_SPACE_TAB: | |
| 1542 pass | |
| 1543 elif ch == '?' and self.scanner_processing_version != (1, 1): | |
| 1544 pass | |
| 1545 elif ( | |
| 1546 ch in _THE_END_SPACE_TAB | |
| 1547 or ( | |
| 1548 not self.flow_level | |
| 1549 and ch == ':' | |
| 1550 and srp(length + 1) in _THE_END_SPACE_TAB | |
| 1551 ) | |
| 1552 or (self.flow_level and ch in ',:?[]{}') | |
| 1553 ): | |
| 1554 break | |
| 1555 length += 1 | |
| 1556 # It's not clear what we should do with ':' in the flow context. | |
| 1557 if ( | |
| 1558 self.flow_level | |
| 1559 and ch == ':' | |
| 1560 and srp(length + 1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}' | |
| 1561 ): | |
| 1562 srf(length) | |
| 1563 raise ScannerError( | |
| 1564 'while scanning a plain scalar', | |
| 1565 start_mark, | |
| 1566 "found unexpected ':'", | |
| 1567 self.reader.get_mark(), | |
| 1568 'Please check ' | |
| 1569 'http://pyyaml.org/wiki/YAMLColonInFlowContext ' | |
| 1570 'for details.', | |
| 1571 ) | |
| 1572 if length == 0: | |
| 1573 break | |
| 1574 self.allow_simple_key = False | |
| 1575 chunks.extend(spaces) | |
| 1576 chunks.append(self.reader.prefix(length)) | |
| 1577 srf(length) | |
| 1578 end_mark = self.reader.get_mark() | |
| 1579 spaces = self.scan_plain_spaces(indent, start_mark) | |
| 1580 if ( | |
| 1581 not spaces | |
| 1582 or srp() == '#' | |
| 1583 or (not self.flow_level and self.reader.column < indent) | |
| 1584 ): | |
| 1585 break | |
| 1586 | |
| 1587 token = ScalarToken("".join(chunks), True, start_mark, end_mark) | |
| 1588 if spaces and spaces[0] == '\n': | |
| 1589 # Create a comment token to preserve the trailing line breaks. | |
| 1590 comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark) | |
| 1591 token.add_post_comment(comment) | |
| 1592 return token | |
| 1593 | |
| 1594 def scan_plain_spaces(self, indent, start_mark): | |
| 1595 # type: (Any, Any) -> Any | |
| 1596 # See the specification for details. | |
| 1597 # The specification is really confusing about tabs in plain scalars. | |
| 1598 # We just forbid them completely. Do not use tabs in YAML! | |
| 1599 srp = self.reader.peek | |
| 1600 srf = self.reader.forward | |
| 1601 chunks = [] | |
| 1602 length = 0 | |
| 1603 while srp(length) in ' ': | |
| 1604 length += 1 | |
| 1605 whitespaces = self.reader.prefix(length) | |
| 1606 self.reader.forward(length) | |
| 1607 ch = srp() | |
| 1608 if ch in '\r\n\x85\u2028\u2029': | |
| 1609 line_break = self.scan_line_break() | |
| 1610 self.allow_simple_key = True | |
| 1611 prefix = self.reader.prefix(3) | |
| 1612 if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: | |
| 1613 return | |
| 1614 breaks = [] | |
| 1615 while srp() in ' \r\n\x85\u2028\u2029': | |
| 1616 if srp() == ' ': | |
| 1617 srf() | |
| 1618 else: | |
| 1619 breaks.append(self.scan_line_break()) | |
| 1620 prefix = self.reader.prefix(3) | |
| 1621 if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: | |
| 1622 return | |
| 1623 if line_break != '\n': | |
| 1624 chunks.append(line_break) | |
| 1625 elif not breaks: | |
| 1626 chunks.append(' ') | |
| 1627 chunks.extend(breaks) | |
| 1628 elif whitespaces: | |
| 1629 chunks.append(whitespaces) | |
| 1630 return chunks | |
| 1631 | |
| 1632 def scan_tag_handle(self, name, start_mark): | |
| 1633 # type: (Any, Any) -> Any | |
| 1634 # See the specification for details. | |
| 1635 # For some strange reasons, the specification does not allow '_' in | |
| 1636 # tag handles. I have allowed it anyway. | |
| 1637 srp = self.reader.peek | |
| 1638 ch = srp() | |
| 1639 if ch != '!': | |
| 1640 raise ScannerError( | |
| 1641 'while scanning a %s' % (name,), | |
| 1642 start_mark, | |
| 1643 "expected '!', but found %r" % utf8(ch), | |
| 1644 self.reader.get_mark(), | |
| 1645 ) | |
| 1646 length = 1 | |
| 1647 ch = srp(length) | |
| 1648 if ch != ' ': | |
| 1649 while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_': | |
| 1650 length += 1 | |
| 1651 ch = srp(length) | |
| 1652 if ch != '!': | |
| 1653 self.reader.forward(length) | |
| 1654 raise ScannerError( | |
| 1655 'while scanning a %s' % (name,), | |
| 1656 start_mark, | |
| 1657 "expected '!', but found %r" % utf8(ch), | |
| 1658 self.reader.get_mark(), | |
| 1659 ) | |
| 1660 length += 1 | |
| 1661 value = self.reader.prefix(length) | |
| 1662 self.reader.forward(length) | |
| 1663 return value | |
| 1664 | |
| 1665 def scan_tag_uri(self, name, start_mark): | |
| 1666 # type: (Any, Any) -> Any | |
| 1667 # See the specification for details. | |
| 1668 # Note: we do not check if URI is well-formed. | |
| 1669 srp = self.reader.peek | |
| 1670 chunks = [] | |
| 1671 length = 0 | |
| 1672 ch = srp(length) | |
| 1673 while ( | |
| 1674 '0' <= ch <= '9' | |
| 1675 or 'A' <= ch <= 'Z' | |
| 1676 or 'a' <= ch <= 'z' | |
| 1677 or ch in "-;/?:@&=+$,_.!~*'()[]%" | |
| 1678 ): | |
| 1679 if ch == '%': | |
| 1680 chunks.append(self.reader.prefix(length)) | |
| 1681 self.reader.forward(length) | |
| 1682 length = 0 | |
| 1683 chunks.append(self.scan_uri_escapes(name, start_mark)) | |
| 1684 else: | |
| 1685 length += 1 | |
| 1686 ch = srp(length) | |
| 1687 if length != 0: | |
| 1688 chunks.append(self.reader.prefix(length)) | |
| 1689 self.reader.forward(length) | |
| 1690 length = 0 | |
| 1691 if not chunks: | |
| 1692 raise ScannerError( | |
| 1693 'while parsing a %s' % (name,), | |
| 1694 start_mark, | |
| 1695 'expected URI, but found %r' % utf8(ch), | |
| 1696 self.reader.get_mark(), | |
| 1697 ) | |
| 1698 return "".join(chunks) | |
| 1699 | |
| 1700 def scan_uri_escapes(self, name, start_mark): | |
| 1701 # type: (Any, Any) -> Any | |
| 1702 # See the specification for details. | |
| 1703 srp = self.reader.peek | |
| 1704 srf = self.reader.forward | |
| 1705 code_bytes = [] # type: List[Any] | |
| 1706 mark = self.reader.get_mark() | |
| 1707 while srp() == '%': | |
| 1708 srf() | |
| 1709 for k in range(2): | |
| 1710 if srp(k) not in '0123456789ABCDEFabcdef': | |
| 1711 raise ScannerError( | |
| 1712 'while scanning a %s' % (name,), | |
| 1713 start_mark, | |
| 1714 'expected URI escape sequence of 2 hexdecimal numbers,' | |
| 1715 ' but found %r' % utf8(srp(k)), | |
| 1716 self.reader.get_mark(), | |
| 1717 ) | |
| 1718 if PY3: | |
| 1719 code_bytes.append(int(self.reader.prefix(2), 16)) | |
| 1720 else: | |
| 1721 code_bytes.append(chr(int(self.reader.prefix(2), 16))) | |
| 1722 srf(2) | |
| 1723 try: | |
| 1724 if PY3: | |
| 1725 value = bytes(code_bytes).decode('utf-8') | |
| 1726 else: | |
| 1727 value = unicode(b"".join(code_bytes), 'utf-8') | |
| 1728 except UnicodeDecodeError as exc: | |
| 1729 raise ScannerError('while scanning a %s' % (name,), start_mark, str(exc), mark) | |
| 1730 return value | |
| 1731 | |
| 1732 def scan_line_break(self): | |
| 1733 # type: () -> Any | |
| 1734 # Transforms: | |
| 1735 # '\r\n' : '\n' | |
| 1736 # '\r' : '\n' | |
| 1737 # '\n' : '\n' | |
| 1738 # '\x85' : '\n' | |
| 1739 # '\u2028' : '\u2028' | |
| 1740 # '\u2029 : '\u2029' | |
| 1741 # default : '' | |
| 1742 ch = self.reader.peek() | |
| 1743 if ch in '\r\n\x85': | |
| 1744 if self.reader.prefix(2) == '\r\n': | |
| 1745 self.reader.forward(2) | |
| 1746 else: | |
| 1747 self.reader.forward() | |
| 1748 return '\n' | |
| 1749 elif ch in '\u2028\u2029': | |
| 1750 self.reader.forward() | |
| 1751 return ch | |
| 1752 return "" | |
| 1753 | |
| 1754 | |
| 1755 class RoundTripScanner(Scanner): | |
| 1756 def check_token(self, *choices): | |
| 1757 # type: (Any) -> bool | |
| 1758 # Check if the next token is one of the given types. | |
| 1759 while self.need_more_tokens(): | |
| 1760 self.fetch_more_tokens() | |
| 1761 self._gather_comments() | |
| 1762 if bool(self.tokens): | |
| 1763 if not choices: | |
| 1764 return True | |
| 1765 for choice in choices: | |
| 1766 if isinstance(self.tokens[0], choice): | |
| 1767 return True | |
| 1768 return False | |
| 1769 | |
| 1770 def peek_token(self): | |
| 1771 # type: () -> Any | |
| 1772 # Return the next token, but do not delete if from the queue. | |
| 1773 while self.need_more_tokens(): | |
| 1774 self.fetch_more_tokens() | |
| 1775 self._gather_comments() | |
| 1776 if bool(self.tokens): | |
| 1777 return self.tokens[0] | |
| 1778 return None | |
| 1779 | |
| 1780 def _gather_comments(self): | |
| 1781 # type: () -> Any | |
| 1782 """combine multiple comment lines""" | |
| 1783 comments = [] # type: List[Any] | |
| 1784 if not self.tokens: | |
| 1785 return comments | |
| 1786 if isinstance(self.tokens[0], CommentToken): | |
| 1787 comment = self.tokens.pop(0) | |
| 1788 self.tokens_taken += 1 | |
| 1789 comments.append(comment) | |
| 1790 while self.need_more_tokens(): | |
| 1791 self.fetch_more_tokens() | |
| 1792 if not self.tokens: | |
| 1793 return comments | |
| 1794 if isinstance(self.tokens[0], CommentToken): | |
| 1795 self.tokens_taken += 1 | |
| 1796 comment = self.tokens.pop(0) | |
| 1797 # nprint('dropping2', comment) | |
| 1798 comments.append(comment) | |
| 1799 if len(comments) >= 1: | |
| 1800 self.tokens[0].add_pre_comments(comments) | |
| 1801 # pull in post comment on e.g. ':' | |
| 1802 if not self.done and len(self.tokens) < 2: | |
| 1803 self.fetch_more_tokens() | |
| 1804 | |
| 1805 def get_token(self): | |
| 1806 # type: () -> Any | |
| 1807 # Return the next token. | |
| 1808 while self.need_more_tokens(): | |
| 1809 self.fetch_more_tokens() | |
| 1810 self._gather_comments() | |
| 1811 if bool(self.tokens): | |
| 1812 # nprint('tk', self.tokens) | |
| 1813 # only add post comment to single line tokens: | |
| 1814 # scalar, value token. FlowXEndToken, otherwise | |
| 1815 # hidden streamtokens could get them (leave them and they will be | |
| 1816 # pre comments for the next map/seq | |
| 1817 if ( | |
| 1818 len(self.tokens) > 1 | |
| 1819 and isinstance( | |
| 1820 self.tokens[0], | |
| 1821 (ScalarToken, ValueToken, FlowSequenceEndToken, FlowMappingEndToken), | |
| 1822 ) | |
| 1823 and isinstance(self.tokens[1], CommentToken) | |
| 1824 and self.tokens[0].end_mark.line == self.tokens[1].start_mark.line | |
| 1825 ): | |
| 1826 self.tokens_taken += 1 | |
| 1827 c = self.tokens.pop(1) | |
| 1828 self.fetch_more_tokens() | |
| 1829 while len(self.tokens) > 1 and isinstance(self.tokens[1], CommentToken): | |
| 1830 self.tokens_taken += 1 | |
| 1831 c1 = self.tokens.pop(1) | |
| 1832 c.value = c.value + (' ' * c1.start_mark.column) + c1.value | |
| 1833 self.fetch_more_tokens() | |
| 1834 self.tokens[0].add_post_comment(c) | |
| 1835 elif ( | |
| 1836 len(self.tokens) > 1 | |
| 1837 and isinstance(self.tokens[0], ScalarToken) | |
| 1838 and isinstance(self.tokens[1], CommentToken) | |
| 1839 and self.tokens[0].end_mark.line != self.tokens[1].start_mark.line | |
| 1840 ): | |
| 1841 self.tokens_taken += 1 | |
| 1842 c = self.tokens.pop(1) | |
| 1843 c.value = ( | |
| 1844 '\n' * (c.start_mark.line - self.tokens[0].end_mark.line) | |
| 1845 + (' ' * c.start_mark.column) | |
| 1846 + c.value | |
| 1847 ) | |
| 1848 self.tokens[0].add_post_comment(c) | |
| 1849 self.fetch_more_tokens() | |
| 1850 while len(self.tokens) > 1 and isinstance(self.tokens[1], CommentToken): | |
| 1851 self.tokens_taken += 1 | |
| 1852 c1 = self.tokens.pop(1) | |
| 1853 c.value = c.value + (' ' * c1.start_mark.column) + c1.value | |
| 1854 self.fetch_more_tokens() | |
| 1855 self.tokens_taken += 1 | |
| 1856 return self.tokens.pop(0) | |
| 1857 return None | |
| 1858 | |
| 1859 def fetch_comment(self, comment): | |
| 1860 # type: (Any) -> None | |
| 1861 value, start_mark, end_mark = comment | |
| 1862 while value and value[-1] == ' ': | |
| 1863 # empty line within indented key context | |
| 1864 # no need to update end-mark, that is not used | |
| 1865 value = value[:-1] | |
| 1866 self.tokens.append(CommentToken(value, start_mark, end_mark)) | |
| 1867 | |
| 1868 # scanner | |
| 1869 | |
| 1870 def scan_to_next_token(self): | |
| 1871 # type: () -> Any | |
| 1872 # We ignore spaces, line breaks and comments. | |
| 1873 # If we find a line break in the block context, we set the flag | |
| 1874 # `allow_simple_key` on. | |
| 1875 # The byte order mark is stripped if it's the first character in the | |
| 1876 # stream. We do not yet support BOM inside the stream as the | |
| 1877 # specification requires. Any such mark will be considered as a part | |
| 1878 # of the document. | |
| 1879 # | |
| 1880 # TODO: We need to make tab handling rules more sane. A good rule is | |
| 1881 # Tabs cannot precede tokens | |
| 1882 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, | |
| 1883 # KEY(block), VALUE(block), BLOCK-ENTRY | |
| 1884 # So the checking code is | |
| 1885 # if <TAB>: | |
| 1886 # self.allow_simple_keys = False | |
| 1887 # We also need to add the check for `allow_simple_keys == True` to | |
| 1888 # `unwind_indent` before issuing BLOCK-END. | |
| 1889 # Scanners for block, flow, and plain scalars need to be modified. | |
| 1890 | |
| 1891 srp = self.reader.peek | |
| 1892 srf = self.reader.forward | |
| 1893 if self.reader.index == 0 and srp() == '\uFEFF': | |
| 1894 srf() | |
| 1895 found = False | |
| 1896 while not found: | |
| 1897 while srp() == ' ': | |
| 1898 srf() | |
| 1899 ch = srp() | |
| 1900 if ch == '#': | |
| 1901 start_mark = self.reader.get_mark() | |
| 1902 comment = ch | |
| 1903 srf() | |
| 1904 while ch not in _THE_END: | |
| 1905 ch = srp() | |
| 1906 if ch == '\0': # don't gobble the end-of-stream character | |
| 1907 # but add an explicit newline as "YAML processors should terminate | |
| 1908 # the stream with an explicit line break | |
| 1909 # https://yaml.org/spec/1.2/spec.html#id2780069 | |
| 1910 comment += '\n' | |
| 1911 break | |
| 1912 comment += ch | |
| 1913 srf() | |
| 1914 # gather any blank lines following the comment too | |
| 1915 ch = self.scan_line_break() | |
| 1916 while len(ch) > 0: | |
| 1917 comment += ch | |
| 1918 ch = self.scan_line_break() | |
| 1919 end_mark = self.reader.get_mark() | |
| 1920 if not self.flow_level: | |
| 1921 self.allow_simple_key = True | |
| 1922 return comment, start_mark, end_mark | |
| 1923 if bool(self.scan_line_break()): | |
| 1924 start_mark = self.reader.get_mark() | |
| 1925 if not self.flow_level: | |
| 1926 self.allow_simple_key = True | |
| 1927 ch = srp() | |
| 1928 if ch == '\n': # empty toplevel lines | |
| 1929 start_mark = self.reader.get_mark() | |
| 1930 comment = "" | |
| 1931 while ch: | |
| 1932 ch = self.scan_line_break(empty_line=True) | |
| 1933 comment += ch | |
| 1934 if srp() == '#': | |
| 1935 # empty line followed by indented real comment | |
| 1936 comment = comment.rsplit('\n', 1)[0] + '\n' | |
| 1937 end_mark = self.reader.get_mark() | |
| 1938 return comment, start_mark, end_mark | |
| 1939 else: | |
| 1940 found = True | |
| 1941 return None | |
| 1942 | |
| 1943 def scan_line_break(self, empty_line=False): | |
| 1944 # type: (bool) -> Text | |
| 1945 # Transforms: | |
| 1946 # '\r\n' : '\n' | |
| 1947 # '\r' : '\n' | |
| 1948 # '\n' : '\n' | |
| 1949 # '\x85' : '\n' | |
| 1950 # '\u2028' : '\u2028' | |
| 1951 # '\u2029 : '\u2029' | |
| 1952 # default : '' | |
| 1953 ch = self.reader.peek() # type: Text | |
| 1954 if ch in '\r\n\x85': | |
| 1955 if self.reader.prefix(2) == '\r\n': | |
| 1956 self.reader.forward(2) | |
| 1957 else: | |
| 1958 self.reader.forward() | |
| 1959 return '\n' | |
| 1960 elif ch in '\u2028\u2029': | |
| 1961 self.reader.forward() | |
| 1962 return ch | |
| 1963 elif empty_line and ch in '\t ': | |
| 1964 self.reader.forward() | |
| 1965 return ch | |
| 1966 return "" | |
| 1967 | |
| 1968 def scan_block_scalar(self, style, rt=True): | |
| 1969 # type: (Any, Optional[bool]) -> Any | |
| 1970 return Scanner.scan_block_scalar(self, style, rt=rt) | |
| 1971 | |
| 1972 | |
| 1973 # try: | |
| 1974 # import psyco | |
| 1975 # psyco.bind(Scanner) | |
| 1976 # except ImportError: | |
| 1977 # pass |
