Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/yaml/emitter.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 | |
2 # Emitter expects events obeying the following grammar: | |
3 # stream ::= STREAM-START document* STREAM-END | |
4 # document ::= DOCUMENT-START node DOCUMENT-END | |
5 # node ::= SCALAR | sequence | mapping | |
6 # sequence ::= SEQUENCE-START node* SEQUENCE-END | |
7 # mapping ::= MAPPING-START (node node)* MAPPING-END | |
8 | |
9 __all__ = ['Emitter', 'EmitterError'] | |
10 | |
11 from .error import YAMLError | |
12 from .events import * | |
13 | |
14 class EmitterError(YAMLError): | |
15 pass | |
16 | |
17 class ScalarAnalysis: | |
18 def __init__(self, scalar, empty, multiline, | |
19 allow_flow_plain, allow_block_plain, | |
20 allow_single_quoted, allow_double_quoted, | |
21 allow_block): | |
22 self.scalar = scalar | |
23 self.empty = empty | |
24 self.multiline = multiline | |
25 self.allow_flow_plain = allow_flow_plain | |
26 self.allow_block_plain = allow_block_plain | |
27 self.allow_single_quoted = allow_single_quoted | |
28 self.allow_double_quoted = allow_double_quoted | |
29 self.allow_block = allow_block | |
30 | |
31 class Emitter: | |
32 | |
33 DEFAULT_TAG_PREFIXES = { | |
34 '!' : '!', | |
35 'tag:yaml.org,2002:' : '!!', | |
36 } | |
37 | |
38 def __init__(self, stream, canonical=None, indent=None, width=None, | |
39 allow_unicode=None, line_break=None): | |
40 | |
41 # The stream should have the methods `write` and possibly `flush`. | |
42 self.stream = stream | |
43 | |
44 # Encoding can be overridden by STREAM-START. | |
45 self.encoding = None | |
46 | |
47 # Emitter is a state machine with a stack of states to handle nested | |
48 # structures. | |
49 self.states = [] | |
50 self.state = self.expect_stream_start | |
51 | |
52 # Current event and the event queue. | |
53 self.events = [] | |
54 self.event = None | |
55 | |
56 # The current indentation level and the stack of previous indents. | |
57 self.indents = [] | |
58 self.indent = None | |
59 | |
60 # Flow level. | |
61 self.flow_level = 0 | |
62 | |
63 # Contexts. | |
64 self.root_context = False | |
65 self.sequence_context = False | |
66 self.mapping_context = False | |
67 self.simple_key_context = False | |
68 | |
69 # Characteristics of the last emitted character: | |
70 # - current position. | |
71 # - is it a whitespace? | |
72 # - is it an indention character | |
73 # (indentation space, '-', '?', or ':')? | |
74 self.line = 0 | |
75 self.column = 0 | |
76 self.whitespace = True | |
77 self.indention = True | |
78 | |
79 # Whether the document requires an explicit document indicator | |
80 self.open_ended = False | |
81 | |
82 # Formatting details. | |
83 self.canonical = canonical | |
84 self.allow_unicode = allow_unicode | |
85 self.best_indent = 2 | |
86 if indent and 1 < indent < 10: | |
87 self.best_indent = indent | |
88 self.best_width = 80 | |
89 if width and width > self.best_indent*2: | |
90 self.best_width = width | |
91 self.best_line_break = '\n' | |
92 if line_break in ['\r', '\n', '\r\n']: | |
93 self.best_line_break = line_break | |
94 | |
95 # Tag prefixes. | |
96 self.tag_prefixes = None | |
97 | |
98 # Prepared anchor and tag. | |
99 self.prepared_anchor = None | |
100 self.prepared_tag = None | |
101 | |
102 # Scalar analysis and style. | |
103 self.analysis = None | |
104 self.style = None | |
105 | |
106 def dispose(self): | |
107 # Reset the state attributes (to clear self-references) | |
108 self.states = [] | |
109 self.state = None | |
110 | |
111 def emit(self, event): | |
112 self.events.append(event) | |
113 while not self.need_more_events(): | |
114 self.event = self.events.pop(0) | |
115 self.state() | |
116 self.event = None | |
117 | |
118 # In some cases, we wait for a few next events before emitting. | |
119 | |
120 def need_more_events(self): | |
121 if not self.events: | |
122 return True | |
123 event = self.events[0] | |
124 if isinstance(event, DocumentStartEvent): | |
125 return self.need_events(1) | |
126 elif isinstance(event, SequenceStartEvent): | |
127 return self.need_events(2) | |
128 elif isinstance(event, MappingStartEvent): | |
129 return self.need_events(3) | |
130 else: | |
131 return False | |
132 | |
133 def need_events(self, count): | |
134 level = 0 | |
135 for event in self.events[1:]: | |
136 if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): | |
137 level += 1 | |
138 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): | |
139 level -= 1 | |
140 elif isinstance(event, StreamEndEvent): | |
141 level = -1 | |
142 if level < 0: | |
143 return False | |
144 return (len(self.events) < count+1) | |
145 | |
146 def increase_indent(self, flow=False, indentless=False): | |
147 self.indents.append(self.indent) | |
148 if self.indent is None: | |
149 if flow: | |
150 self.indent = self.best_indent | |
151 else: | |
152 self.indent = 0 | |
153 elif not indentless: | |
154 self.indent += self.best_indent | |
155 | |
156 # States. | |
157 | |
158 # Stream handlers. | |
159 | |
160 def expect_stream_start(self): | |
161 if isinstance(self.event, StreamStartEvent): | |
162 if self.event.encoding and not hasattr(self.stream, 'encoding'): | |
163 self.encoding = self.event.encoding | |
164 self.write_stream_start() | |
165 self.state = self.expect_first_document_start | |
166 else: | |
167 raise EmitterError("expected StreamStartEvent, but got %s" | |
168 % self.event) | |
169 | |
170 def expect_nothing(self): | |
171 raise EmitterError("expected nothing, but got %s" % self.event) | |
172 | |
173 # Document handlers. | |
174 | |
175 def expect_first_document_start(self): | |
176 return self.expect_document_start(first=True) | |
177 | |
178 def expect_document_start(self, first=False): | |
179 if isinstance(self.event, DocumentStartEvent): | |
180 if (self.event.version or self.event.tags) and self.open_ended: | |
181 self.write_indicator('...', True) | |
182 self.write_indent() | |
183 if self.event.version: | |
184 version_text = self.prepare_version(self.event.version) | |
185 self.write_version_directive(version_text) | |
186 self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() | |
187 if self.event.tags: | |
188 handles = sorted(self.event.tags.keys()) | |
189 for handle in handles: | |
190 prefix = self.event.tags[handle] | |
191 self.tag_prefixes[prefix] = handle | |
192 handle_text = self.prepare_tag_handle(handle) | |
193 prefix_text = self.prepare_tag_prefix(prefix) | |
194 self.write_tag_directive(handle_text, prefix_text) | |
195 implicit = (first and not self.event.explicit and not self.canonical | |
196 and not self.event.version and not self.event.tags | |
197 and not self.check_empty_document()) | |
198 if not implicit: | |
199 self.write_indent() | |
200 self.write_indicator('---', True) | |
201 if self.canonical: | |
202 self.write_indent() | |
203 self.state = self.expect_document_root | |
204 elif isinstance(self.event, StreamEndEvent): | |
205 if self.open_ended: | |
206 self.write_indicator('...', True) | |
207 self.write_indent() | |
208 self.write_stream_end() | |
209 self.state = self.expect_nothing | |
210 else: | |
211 raise EmitterError("expected DocumentStartEvent, but got %s" | |
212 % self.event) | |
213 | |
214 def expect_document_end(self): | |
215 if isinstance(self.event, DocumentEndEvent): | |
216 self.write_indent() | |
217 if self.event.explicit: | |
218 self.write_indicator('...', True) | |
219 self.write_indent() | |
220 self.flush_stream() | |
221 self.state = self.expect_document_start | |
222 else: | |
223 raise EmitterError("expected DocumentEndEvent, but got %s" | |
224 % self.event) | |
225 | |
226 def expect_document_root(self): | |
227 self.states.append(self.expect_document_end) | |
228 self.expect_node(root=True) | |
229 | |
230 # Node handlers. | |
231 | |
232 def expect_node(self, root=False, sequence=False, mapping=False, | |
233 simple_key=False): | |
234 self.root_context = root | |
235 self.sequence_context = sequence | |
236 self.mapping_context = mapping | |
237 self.simple_key_context = simple_key | |
238 if isinstance(self.event, AliasEvent): | |
239 self.expect_alias() | |
240 elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): | |
241 self.process_anchor('&') | |
242 self.process_tag() | |
243 if isinstance(self.event, ScalarEvent): | |
244 self.expect_scalar() | |
245 elif isinstance(self.event, SequenceStartEvent): | |
246 if self.flow_level or self.canonical or self.event.flow_style \ | |
247 or self.check_empty_sequence(): | |
248 self.expect_flow_sequence() | |
249 else: | |
250 self.expect_block_sequence() | |
251 elif isinstance(self.event, MappingStartEvent): | |
252 if self.flow_level or self.canonical or self.event.flow_style \ | |
253 or self.check_empty_mapping(): | |
254 self.expect_flow_mapping() | |
255 else: | |
256 self.expect_block_mapping() | |
257 else: | |
258 raise EmitterError("expected NodeEvent, but got %s" % self.event) | |
259 | |
260 def expect_alias(self): | |
261 if self.event.anchor is None: | |
262 raise EmitterError("anchor is not specified for alias") | |
263 self.process_anchor('*') | |
264 self.state = self.states.pop() | |
265 | |
266 def expect_scalar(self): | |
267 self.increase_indent(flow=True) | |
268 self.process_scalar() | |
269 self.indent = self.indents.pop() | |
270 self.state = self.states.pop() | |
271 | |
272 # Flow sequence handlers. | |
273 | |
274 def expect_flow_sequence(self): | |
275 self.write_indicator('[', True, whitespace=True) | |
276 self.flow_level += 1 | |
277 self.increase_indent(flow=True) | |
278 self.state = self.expect_first_flow_sequence_item | |
279 | |
280 def expect_first_flow_sequence_item(self): | |
281 if isinstance(self.event, SequenceEndEvent): | |
282 self.indent = self.indents.pop() | |
283 self.flow_level -= 1 | |
284 self.write_indicator(']', False) | |
285 self.state = self.states.pop() | |
286 else: | |
287 if self.canonical or self.column > self.best_width: | |
288 self.write_indent() | |
289 self.states.append(self.expect_flow_sequence_item) | |
290 self.expect_node(sequence=True) | |
291 | |
292 def expect_flow_sequence_item(self): | |
293 if isinstance(self.event, SequenceEndEvent): | |
294 self.indent = self.indents.pop() | |
295 self.flow_level -= 1 | |
296 if self.canonical: | |
297 self.write_indicator(',', False) | |
298 self.write_indent() | |
299 self.write_indicator(']', False) | |
300 self.state = self.states.pop() | |
301 else: | |
302 self.write_indicator(',', False) | |
303 if self.canonical or self.column > self.best_width: | |
304 self.write_indent() | |
305 self.states.append(self.expect_flow_sequence_item) | |
306 self.expect_node(sequence=True) | |
307 | |
308 # Flow mapping handlers. | |
309 | |
310 def expect_flow_mapping(self): | |
311 self.write_indicator('{', True, whitespace=True) | |
312 self.flow_level += 1 | |
313 self.increase_indent(flow=True) | |
314 self.state = self.expect_first_flow_mapping_key | |
315 | |
316 def expect_first_flow_mapping_key(self): | |
317 if isinstance(self.event, MappingEndEvent): | |
318 self.indent = self.indents.pop() | |
319 self.flow_level -= 1 | |
320 self.write_indicator('}', False) | |
321 self.state = self.states.pop() | |
322 else: | |
323 if self.canonical or self.column > self.best_width: | |
324 self.write_indent() | |
325 if not self.canonical and self.check_simple_key(): | |
326 self.states.append(self.expect_flow_mapping_simple_value) | |
327 self.expect_node(mapping=True, simple_key=True) | |
328 else: | |
329 self.write_indicator('?', True) | |
330 self.states.append(self.expect_flow_mapping_value) | |
331 self.expect_node(mapping=True) | |
332 | |
333 def expect_flow_mapping_key(self): | |
334 if isinstance(self.event, MappingEndEvent): | |
335 self.indent = self.indents.pop() | |
336 self.flow_level -= 1 | |
337 if self.canonical: | |
338 self.write_indicator(',', False) | |
339 self.write_indent() | |
340 self.write_indicator('}', False) | |
341 self.state = self.states.pop() | |
342 else: | |
343 self.write_indicator(',', False) | |
344 if self.canonical or self.column > self.best_width: | |
345 self.write_indent() | |
346 if not self.canonical and self.check_simple_key(): | |
347 self.states.append(self.expect_flow_mapping_simple_value) | |
348 self.expect_node(mapping=True, simple_key=True) | |
349 else: | |
350 self.write_indicator('?', True) | |
351 self.states.append(self.expect_flow_mapping_value) | |
352 self.expect_node(mapping=True) | |
353 | |
354 def expect_flow_mapping_simple_value(self): | |
355 self.write_indicator(':', False) | |
356 self.states.append(self.expect_flow_mapping_key) | |
357 self.expect_node(mapping=True) | |
358 | |
359 def expect_flow_mapping_value(self): | |
360 if self.canonical or self.column > self.best_width: | |
361 self.write_indent() | |
362 self.write_indicator(':', True) | |
363 self.states.append(self.expect_flow_mapping_key) | |
364 self.expect_node(mapping=True) | |
365 | |
366 # Block sequence handlers. | |
367 | |
368 def expect_block_sequence(self): | |
369 indentless = (self.mapping_context and not self.indention) | |
370 self.increase_indent(flow=False, indentless=indentless) | |
371 self.state = self.expect_first_block_sequence_item | |
372 | |
373 def expect_first_block_sequence_item(self): | |
374 return self.expect_block_sequence_item(first=True) | |
375 | |
376 def expect_block_sequence_item(self, first=False): | |
377 if not first and isinstance(self.event, SequenceEndEvent): | |
378 self.indent = self.indents.pop() | |
379 self.state = self.states.pop() | |
380 else: | |
381 self.write_indent() | |
382 self.write_indicator('-', True, indention=True) | |
383 self.states.append(self.expect_block_sequence_item) | |
384 self.expect_node(sequence=True) | |
385 | |
386 # Block mapping handlers. | |
387 | |
388 def expect_block_mapping(self): | |
389 self.increase_indent(flow=False) | |
390 self.state = self.expect_first_block_mapping_key | |
391 | |
392 def expect_first_block_mapping_key(self): | |
393 return self.expect_block_mapping_key(first=True) | |
394 | |
395 def expect_block_mapping_key(self, first=False): | |
396 if not first and isinstance(self.event, MappingEndEvent): | |
397 self.indent = self.indents.pop() | |
398 self.state = self.states.pop() | |
399 else: | |
400 self.write_indent() | |
401 if self.check_simple_key(): | |
402 self.states.append(self.expect_block_mapping_simple_value) | |
403 self.expect_node(mapping=True, simple_key=True) | |
404 else: | |
405 self.write_indicator('?', True, indention=True) | |
406 self.states.append(self.expect_block_mapping_value) | |
407 self.expect_node(mapping=True) | |
408 | |
409 def expect_block_mapping_simple_value(self): | |
410 self.write_indicator(':', False) | |
411 self.states.append(self.expect_block_mapping_key) | |
412 self.expect_node(mapping=True) | |
413 | |
414 def expect_block_mapping_value(self): | |
415 self.write_indent() | |
416 self.write_indicator(':', True, indention=True) | |
417 self.states.append(self.expect_block_mapping_key) | |
418 self.expect_node(mapping=True) | |
419 | |
420 # Checkers. | |
421 | |
422 def check_empty_sequence(self): | |
423 return (isinstance(self.event, SequenceStartEvent) and self.events | |
424 and isinstance(self.events[0], SequenceEndEvent)) | |
425 | |
426 def check_empty_mapping(self): | |
427 return (isinstance(self.event, MappingStartEvent) and self.events | |
428 and isinstance(self.events[0], MappingEndEvent)) | |
429 | |
430 def check_empty_document(self): | |
431 if not isinstance(self.event, DocumentStartEvent) or not self.events: | |
432 return False | |
433 event = self.events[0] | |
434 return (isinstance(event, ScalarEvent) and event.anchor is None | |
435 and event.tag is None and event.implicit and event.value == '') | |
436 | |
437 def check_simple_key(self): | |
438 length = 0 | |
439 if isinstance(self.event, NodeEvent) and self.event.anchor is not None: | |
440 if self.prepared_anchor is None: | |
441 self.prepared_anchor = self.prepare_anchor(self.event.anchor) | |
442 length += len(self.prepared_anchor) | |
443 if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ | |
444 and self.event.tag is not None: | |
445 if self.prepared_tag is None: | |
446 self.prepared_tag = self.prepare_tag(self.event.tag) | |
447 length += len(self.prepared_tag) | |
448 if isinstance(self.event, ScalarEvent): | |
449 if self.analysis is None: | |
450 self.analysis = self.analyze_scalar(self.event.value) | |
451 length += len(self.analysis.scalar) | |
452 return (length < 128 and (isinstance(self.event, AliasEvent) | |
453 or (isinstance(self.event, ScalarEvent) | |
454 and not self.analysis.empty and not self.analysis.multiline) | |
455 or self.check_empty_sequence() or self.check_empty_mapping())) | |
456 | |
457 # Anchor, Tag, and Scalar processors. | |
458 | |
459 def process_anchor(self, indicator): | |
460 if self.event.anchor is None: | |
461 self.prepared_anchor = None | |
462 return | |
463 if self.prepared_anchor is None: | |
464 self.prepared_anchor = self.prepare_anchor(self.event.anchor) | |
465 if self.prepared_anchor: | |
466 self.write_indicator(indicator+self.prepared_anchor, True) | |
467 self.prepared_anchor = None | |
468 | |
469 def process_tag(self): | |
470 tag = self.event.tag | |
471 if isinstance(self.event, ScalarEvent): | |
472 if self.style is None: | |
473 self.style = self.choose_scalar_style() | |
474 if ((not self.canonical or tag is None) and | |
475 ((self.style == '' and self.event.implicit[0]) | |
476 or (self.style != '' and self.event.implicit[1]))): | |
477 self.prepared_tag = None | |
478 return | |
479 if self.event.implicit[0] and tag is None: | |
480 tag = '!' | |
481 self.prepared_tag = None | |
482 else: | |
483 if (not self.canonical or tag is None) and self.event.implicit: | |
484 self.prepared_tag = None | |
485 return | |
486 if tag is None: | |
487 raise EmitterError("tag is not specified") | |
488 if self.prepared_tag is None: | |
489 self.prepared_tag = self.prepare_tag(tag) | |
490 if self.prepared_tag: | |
491 self.write_indicator(self.prepared_tag, True) | |
492 self.prepared_tag = None | |
493 | |
494 def choose_scalar_style(self): | |
495 if self.analysis is None: | |
496 self.analysis = self.analyze_scalar(self.event.value) | |
497 if self.event.style == '"' or self.canonical: | |
498 return '"' | |
499 if not self.event.style and self.event.implicit[0]: | |
500 if (not (self.simple_key_context and | |
501 (self.analysis.empty or self.analysis.multiline)) | |
502 and (self.flow_level and self.analysis.allow_flow_plain | |
503 or (not self.flow_level and self.analysis.allow_block_plain))): | |
504 return '' | |
505 if self.event.style and self.event.style in '|>': | |
506 if (not self.flow_level and not self.simple_key_context | |
507 and self.analysis.allow_block): | |
508 return self.event.style | |
509 if not self.event.style or self.event.style == '\'': | |
510 if (self.analysis.allow_single_quoted and | |
511 not (self.simple_key_context and self.analysis.multiline)): | |
512 return '\'' | |
513 return '"' | |
514 | |
515 def process_scalar(self): | |
516 if self.analysis is None: | |
517 self.analysis = self.analyze_scalar(self.event.value) | |
518 if self.style is None: | |
519 self.style = self.choose_scalar_style() | |
520 split = (not self.simple_key_context) | |
521 #if self.analysis.multiline and split \ | |
522 # and (not self.style or self.style in '\'\"'): | |
523 # self.write_indent() | |
524 if self.style == '"': | |
525 self.write_double_quoted(self.analysis.scalar, split) | |
526 elif self.style == '\'': | |
527 self.write_single_quoted(self.analysis.scalar, split) | |
528 elif self.style == '>': | |
529 self.write_folded(self.analysis.scalar) | |
530 elif self.style == '|': | |
531 self.write_literal(self.analysis.scalar) | |
532 else: | |
533 self.write_plain(self.analysis.scalar, split) | |
534 self.analysis = None | |
535 self.style = None | |
536 | |
537 # Analyzers. | |
538 | |
539 def prepare_version(self, version): | |
540 major, minor = version | |
541 if major != 1: | |
542 raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) | |
543 return '%d.%d' % (major, minor) | |
544 | |
545 def prepare_tag_handle(self, handle): | |
546 if not handle: | |
547 raise EmitterError("tag handle must not be empty") | |
548 if handle[0] != '!' or handle[-1] != '!': | |
549 raise EmitterError("tag handle must start and end with '!': %r" % handle) | |
550 for ch in handle[1:-1]: | |
551 if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ | |
552 or ch in '-_'): | |
553 raise EmitterError("invalid character %r in the tag handle: %r" | |
554 % (ch, handle)) | |
555 return handle | |
556 | |
557 def prepare_tag_prefix(self, prefix): | |
558 if not prefix: | |
559 raise EmitterError("tag prefix must not be empty") | |
560 chunks = [] | |
561 start = end = 0 | |
562 if prefix[0] == '!': | |
563 end = 1 | |
564 while end < len(prefix): | |
565 ch = prefix[end] | |
566 if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ | |
567 or ch in '-;/?!:@&=+$,_.~*\'()[]': | |
568 end += 1 | |
569 else: | |
570 if start < end: | |
571 chunks.append(prefix[start:end]) | |
572 start = end = end+1 | |
573 data = ch.encode('utf-8') | |
574 for ch in data: | |
575 chunks.append('%%%02X' % ord(ch)) | |
576 if start < end: | |
577 chunks.append(prefix[start:end]) | |
578 return ''.join(chunks) | |
579 | |
580 def prepare_tag(self, tag): | |
581 if not tag: | |
582 raise EmitterError("tag must not be empty") | |
583 if tag == '!': | |
584 return tag | |
585 handle = None | |
586 suffix = tag | |
587 prefixes = sorted(self.tag_prefixes.keys()) | |
588 for prefix in prefixes: | |
589 if tag.startswith(prefix) \ | |
590 and (prefix == '!' or len(prefix) < len(tag)): | |
591 handle = self.tag_prefixes[prefix] | |
592 suffix = tag[len(prefix):] | |
593 chunks = [] | |
594 start = end = 0 | |
595 while end < len(suffix): | |
596 ch = suffix[end] | |
597 if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ | |
598 or ch in '-;/?:@&=+$,_.~*\'()[]' \ | |
599 or (ch == '!' and handle != '!'): | |
600 end += 1 | |
601 else: | |
602 if start < end: | |
603 chunks.append(suffix[start:end]) | |
604 start = end = end+1 | |
605 data = ch.encode('utf-8') | |
606 for ch in data: | |
607 chunks.append('%%%02X' % ch) | |
608 if start < end: | |
609 chunks.append(suffix[start:end]) | |
610 suffix_text = ''.join(chunks) | |
611 if handle: | |
612 return '%s%s' % (handle, suffix_text) | |
613 else: | |
614 return '!<%s>' % suffix_text | |
615 | |
616 def prepare_anchor(self, anchor): | |
617 if not anchor: | |
618 raise EmitterError("anchor must not be empty") | |
619 for ch in anchor: | |
620 if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ | |
621 or ch in '-_'): | |
622 raise EmitterError("invalid character %r in the anchor: %r" | |
623 % (ch, anchor)) | |
624 return anchor | |
625 | |
626 def analyze_scalar(self, scalar): | |
627 | |
628 # Empty scalar is a special case. | |
629 if not scalar: | |
630 return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, | |
631 allow_flow_plain=False, allow_block_plain=True, | |
632 allow_single_quoted=True, allow_double_quoted=True, | |
633 allow_block=False) | |
634 | |
635 # Indicators and special characters. | |
636 block_indicators = False | |
637 flow_indicators = False | |
638 line_breaks = False | |
639 special_characters = False | |
640 | |
641 # Important whitespace combinations. | |
642 leading_space = False | |
643 leading_break = False | |
644 trailing_space = False | |
645 trailing_break = False | |
646 break_space = False | |
647 space_break = False | |
648 | |
649 # Check document indicators. | |
650 if scalar.startswith('---') or scalar.startswith('...'): | |
651 block_indicators = True | |
652 flow_indicators = True | |
653 | |
654 # First character or preceded by a whitespace. | |
655 preceded_by_whitespace = True | |
656 | |
657 # Last character or followed by a whitespace. | |
658 followed_by_whitespace = (len(scalar) == 1 or | |
659 scalar[1] in '\0 \t\r\n\x85\u2028\u2029') | |
660 | |
661 # The previous character is a space. | |
662 previous_space = False | |
663 | |
664 # The previous character is a break. | |
665 previous_break = False | |
666 | |
667 index = 0 | |
668 while index < len(scalar): | |
669 ch = scalar[index] | |
670 | |
671 # Check for indicators. | |
672 if index == 0: | |
673 # Leading indicators are special characters. | |
674 if ch in '#,[]{}&*!|>\'\"%@`': | |
675 flow_indicators = True | |
676 block_indicators = True | |
677 if ch in '?:': | |
678 flow_indicators = True | |
679 if followed_by_whitespace: | |
680 block_indicators = True | |
681 if ch == '-' and followed_by_whitespace: | |
682 flow_indicators = True | |
683 block_indicators = True | |
684 else: | |
685 # Some indicators cannot appear within a scalar as well. | |
686 if ch in ',?[]{}': | |
687 flow_indicators = True | |
688 if ch == ':': | |
689 flow_indicators = True | |
690 if followed_by_whitespace: | |
691 block_indicators = True | |
692 if ch == '#' and preceded_by_whitespace: | |
693 flow_indicators = True | |
694 block_indicators = True | |
695 | |
696 # Check for line breaks, special, and unicode characters. | |
697 if ch in '\n\x85\u2028\u2029': | |
698 line_breaks = True | |
699 if not (ch == '\n' or '\x20' <= ch <= '\x7E'): | |
700 if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' | |
701 or '\uE000' <= ch <= '\uFFFD' | |
702 or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': | |
703 unicode_characters = True | |
704 if not self.allow_unicode: | |
705 special_characters = True | |
706 else: | |
707 special_characters = True | |
708 | |
709 # Detect important whitespace combinations. | |
710 if ch == ' ': | |
711 if index == 0: | |
712 leading_space = True | |
713 if index == len(scalar)-1: | |
714 trailing_space = True | |
715 if previous_break: | |
716 break_space = True | |
717 previous_space = True | |
718 previous_break = False | |
719 elif ch in '\n\x85\u2028\u2029': | |
720 if index == 0: | |
721 leading_break = True | |
722 if index == len(scalar)-1: | |
723 trailing_break = True | |
724 if previous_space: | |
725 space_break = True | |
726 previous_space = False | |
727 previous_break = True | |
728 else: | |
729 previous_space = False | |
730 previous_break = False | |
731 | |
732 # Prepare for the next character. | |
733 index += 1 | |
734 preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') | |
735 followed_by_whitespace = (index+1 >= len(scalar) or | |
736 scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') | |
737 | |
738 # Let's decide what styles are allowed. | |
739 allow_flow_plain = True | |
740 allow_block_plain = True | |
741 allow_single_quoted = True | |
742 allow_double_quoted = True | |
743 allow_block = True | |
744 | |
745 # Leading and trailing whitespaces are bad for plain scalars. | |
746 if (leading_space or leading_break | |
747 or trailing_space or trailing_break): | |
748 allow_flow_plain = allow_block_plain = False | |
749 | |
750 # We do not permit trailing spaces for block scalars. | |
751 if trailing_space: | |
752 allow_block = False | |
753 | |
754 # Spaces at the beginning of a new line are only acceptable for block | |
755 # scalars. | |
756 if break_space: | |
757 allow_flow_plain = allow_block_plain = allow_single_quoted = False | |
758 | |
759 # Spaces followed by breaks, as well as special character are only | |
760 # allowed for double quoted scalars. | |
761 if space_break or special_characters: | |
762 allow_flow_plain = allow_block_plain = \ | |
763 allow_single_quoted = allow_block = False | |
764 | |
765 # Although the plain scalar writer supports breaks, we never emit | |
766 # multiline plain scalars. | |
767 if line_breaks: | |
768 allow_flow_plain = allow_block_plain = False | |
769 | |
770 # Flow indicators are forbidden for flow plain scalars. | |
771 if flow_indicators: | |
772 allow_flow_plain = False | |
773 | |
774 # Block indicators are forbidden for block plain scalars. | |
775 if block_indicators: | |
776 allow_block_plain = False | |
777 | |
778 return ScalarAnalysis(scalar=scalar, | |
779 empty=False, multiline=line_breaks, | |
780 allow_flow_plain=allow_flow_plain, | |
781 allow_block_plain=allow_block_plain, | |
782 allow_single_quoted=allow_single_quoted, | |
783 allow_double_quoted=allow_double_quoted, | |
784 allow_block=allow_block) | |
785 | |
786 # Writers. | |
787 | |
788 def flush_stream(self): | |
789 if hasattr(self.stream, 'flush'): | |
790 self.stream.flush() | |
791 | |
792 def write_stream_start(self): | |
793 # Write BOM if needed. | |
794 if self.encoding and self.encoding.startswith('utf-16'): | |
795 self.stream.write('\uFEFF'.encode(self.encoding)) | |
796 | |
797 def write_stream_end(self): | |
798 self.flush_stream() | |
799 | |
800 def write_indicator(self, indicator, need_whitespace, | |
801 whitespace=False, indention=False): | |
802 if self.whitespace or not need_whitespace: | |
803 data = indicator | |
804 else: | |
805 data = ' '+indicator | |
806 self.whitespace = whitespace | |
807 self.indention = self.indention and indention | |
808 self.column += len(data) | |
809 self.open_ended = False | |
810 if self.encoding: | |
811 data = data.encode(self.encoding) | |
812 self.stream.write(data) | |
813 | |
814 def write_indent(self): | |
815 indent = self.indent or 0 | |
816 if not self.indention or self.column > indent \ | |
817 or (self.column == indent and not self.whitespace): | |
818 self.write_line_break() | |
819 if self.column < indent: | |
820 self.whitespace = True | |
821 data = ' '*(indent-self.column) | |
822 self.column = indent | |
823 if self.encoding: | |
824 data = data.encode(self.encoding) | |
825 self.stream.write(data) | |
826 | |
827 def write_line_break(self, data=None): | |
828 if data is None: | |
829 data = self.best_line_break | |
830 self.whitespace = True | |
831 self.indention = True | |
832 self.line += 1 | |
833 self.column = 0 | |
834 if self.encoding: | |
835 data = data.encode(self.encoding) | |
836 self.stream.write(data) | |
837 | |
838 def write_version_directive(self, version_text): | |
839 data = '%%YAML %s' % version_text | |
840 if self.encoding: | |
841 data = data.encode(self.encoding) | |
842 self.stream.write(data) | |
843 self.write_line_break() | |
844 | |
845 def write_tag_directive(self, handle_text, prefix_text): | |
846 data = '%%TAG %s %s' % (handle_text, prefix_text) | |
847 if self.encoding: | |
848 data = data.encode(self.encoding) | |
849 self.stream.write(data) | |
850 self.write_line_break() | |
851 | |
852 # Scalar streams. | |
853 | |
854 def write_single_quoted(self, text, split=True): | |
855 self.write_indicator('\'', True) | |
856 spaces = False | |
857 breaks = False | |
858 start = end = 0 | |
859 while end <= len(text): | |
860 ch = None | |
861 if end < len(text): | |
862 ch = text[end] | |
863 if spaces: | |
864 if ch is None or ch != ' ': | |
865 if start+1 == end and self.column > self.best_width and split \ | |
866 and start != 0 and end != len(text): | |
867 self.write_indent() | |
868 else: | |
869 data = text[start:end] | |
870 self.column += len(data) | |
871 if self.encoding: | |
872 data = data.encode(self.encoding) | |
873 self.stream.write(data) | |
874 start = end | |
875 elif breaks: | |
876 if ch is None or ch not in '\n\x85\u2028\u2029': | |
877 if text[start] == '\n': | |
878 self.write_line_break() | |
879 for br in text[start:end]: | |
880 if br == '\n': | |
881 self.write_line_break() | |
882 else: | |
883 self.write_line_break(br) | |
884 self.write_indent() | |
885 start = end | |
886 else: | |
887 if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': | |
888 if start < end: | |
889 data = text[start:end] | |
890 self.column += len(data) | |
891 if self.encoding: | |
892 data = data.encode(self.encoding) | |
893 self.stream.write(data) | |
894 start = end | |
895 if ch == '\'': | |
896 data = '\'\'' | |
897 self.column += 2 | |
898 if self.encoding: | |
899 data = data.encode(self.encoding) | |
900 self.stream.write(data) | |
901 start = end + 1 | |
902 if ch is not None: | |
903 spaces = (ch == ' ') | |
904 breaks = (ch in '\n\x85\u2028\u2029') | |
905 end += 1 | |
906 self.write_indicator('\'', False) | |
907 | |
908 ESCAPE_REPLACEMENTS = { | |
909 '\0': '0', | |
910 '\x07': 'a', | |
911 '\x08': 'b', | |
912 '\x09': 't', | |
913 '\x0A': 'n', | |
914 '\x0B': 'v', | |
915 '\x0C': 'f', | |
916 '\x0D': 'r', | |
917 '\x1B': 'e', | |
918 '\"': '\"', | |
919 '\\': '\\', | |
920 '\x85': 'N', | |
921 '\xA0': '_', | |
922 '\u2028': 'L', | |
923 '\u2029': 'P', | |
924 } | |
925 | |
926 def write_double_quoted(self, text, split=True): | |
927 self.write_indicator('"', True) | |
928 start = end = 0 | |
929 while end <= len(text): | |
930 ch = None | |
931 if end < len(text): | |
932 ch = text[end] | |
933 if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ | |
934 or not ('\x20' <= ch <= '\x7E' | |
935 or (self.allow_unicode | |
936 and ('\xA0' <= ch <= '\uD7FF' | |
937 or '\uE000' <= ch <= '\uFFFD'))): | |
938 if start < end: | |
939 data = text[start:end] | |
940 self.column += len(data) | |
941 if self.encoding: | |
942 data = data.encode(self.encoding) | |
943 self.stream.write(data) | |
944 start = end | |
945 if ch is not None: | |
946 if ch in self.ESCAPE_REPLACEMENTS: | |
947 data = '\\'+self.ESCAPE_REPLACEMENTS[ch] | |
948 elif ch <= '\xFF': | |
949 data = '\\x%02X' % ord(ch) | |
950 elif ch <= '\uFFFF': | |
951 data = '\\u%04X' % ord(ch) | |
952 else: | |
953 data = '\\U%08X' % ord(ch) | |
954 self.column += len(data) | |
955 if self.encoding: | |
956 data = data.encode(self.encoding) | |
957 self.stream.write(data) | |
958 start = end+1 | |
959 if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ | |
960 and self.column+(end-start) > self.best_width and split: | |
961 data = text[start:end]+'\\' | |
962 if start < end: | |
963 start = end | |
964 self.column += len(data) | |
965 if self.encoding: | |
966 data = data.encode(self.encoding) | |
967 self.stream.write(data) | |
968 self.write_indent() | |
969 self.whitespace = False | |
970 self.indention = False | |
971 if text[start] == ' ': | |
972 data = '\\' | |
973 self.column += len(data) | |
974 if self.encoding: | |
975 data = data.encode(self.encoding) | |
976 self.stream.write(data) | |
977 end += 1 | |
978 self.write_indicator('"', False) | |
979 | |
980 def determine_block_hints(self, text): | |
981 hints = '' | |
982 if text: | |
983 if text[0] in ' \n\x85\u2028\u2029': | |
984 hints += str(self.best_indent) | |
985 if text[-1] not in '\n\x85\u2028\u2029': | |
986 hints += '-' | |
987 elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': | |
988 hints += '+' | |
989 return hints | |
990 | |
991 def write_folded(self, text): | |
992 hints = self.determine_block_hints(text) | |
993 self.write_indicator('>'+hints, True) | |
994 if hints[-1:] == '+': | |
995 self.open_ended = True | |
996 self.write_line_break() | |
997 leading_space = True | |
998 spaces = False | |
999 breaks = True | |
1000 start = end = 0 | |
1001 while end <= len(text): | |
1002 ch = None | |
1003 if end < len(text): | |
1004 ch = text[end] | |
1005 if breaks: | |
1006 if ch is None or ch not in '\n\x85\u2028\u2029': | |
1007 if not leading_space and ch is not None and ch != ' ' \ | |
1008 and text[start] == '\n': | |
1009 self.write_line_break() | |
1010 leading_space = (ch == ' ') | |
1011 for br in text[start:end]: | |
1012 if br == '\n': | |
1013 self.write_line_break() | |
1014 else: | |
1015 self.write_line_break(br) | |
1016 if ch is not None: | |
1017 self.write_indent() | |
1018 start = end | |
1019 elif spaces: | |
1020 if ch != ' ': | |
1021 if start+1 == end and self.column > self.best_width: | |
1022 self.write_indent() | |
1023 else: | |
1024 data = text[start:end] | |
1025 self.column += len(data) | |
1026 if self.encoding: | |
1027 data = data.encode(self.encoding) | |
1028 self.stream.write(data) | |
1029 start = end | |
1030 else: | |
1031 if ch is None or ch in ' \n\x85\u2028\u2029': | |
1032 data = text[start:end] | |
1033 self.column += len(data) | |
1034 if self.encoding: | |
1035 data = data.encode(self.encoding) | |
1036 self.stream.write(data) | |
1037 if ch is None: | |
1038 self.write_line_break() | |
1039 start = end | |
1040 if ch is not None: | |
1041 breaks = (ch in '\n\x85\u2028\u2029') | |
1042 spaces = (ch == ' ') | |
1043 end += 1 | |
1044 | |
1045 def write_literal(self, text): | |
1046 hints = self.determine_block_hints(text) | |
1047 self.write_indicator('|'+hints, True) | |
1048 if hints[-1:] == '+': | |
1049 self.open_ended = True | |
1050 self.write_line_break() | |
1051 breaks = True | |
1052 start = end = 0 | |
1053 while end <= len(text): | |
1054 ch = None | |
1055 if end < len(text): | |
1056 ch = text[end] | |
1057 if breaks: | |
1058 if ch is None or ch not in '\n\x85\u2028\u2029': | |
1059 for br in text[start:end]: | |
1060 if br == '\n': | |
1061 self.write_line_break() | |
1062 else: | |
1063 self.write_line_break(br) | |
1064 if ch is not None: | |
1065 self.write_indent() | |
1066 start = end | |
1067 else: | |
1068 if ch is None or ch in '\n\x85\u2028\u2029': | |
1069 data = text[start:end] | |
1070 if self.encoding: | |
1071 data = data.encode(self.encoding) | |
1072 self.stream.write(data) | |
1073 if ch is None: | |
1074 self.write_line_break() | |
1075 start = end | |
1076 if ch is not None: | |
1077 breaks = (ch in '\n\x85\u2028\u2029') | |
1078 end += 1 | |
1079 | |
1080 def write_plain(self, text, split=True): | |
1081 if self.root_context: | |
1082 self.open_ended = True | |
1083 if not text: | |
1084 return | |
1085 if not self.whitespace: | |
1086 data = ' ' | |
1087 self.column += len(data) | |
1088 if self.encoding: | |
1089 data = data.encode(self.encoding) | |
1090 self.stream.write(data) | |
1091 self.whitespace = False | |
1092 self.indention = False | |
1093 spaces = False | |
1094 breaks = False | |
1095 start = end = 0 | |
1096 while end <= len(text): | |
1097 ch = None | |
1098 if end < len(text): | |
1099 ch = text[end] | |
1100 if spaces: | |
1101 if ch != ' ': | |
1102 if start+1 == end and self.column > self.best_width and split: | |
1103 self.write_indent() | |
1104 self.whitespace = False | |
1105 self.indention = False | |
1106 else: | |
1107 data = text[start:end] | |
1108 self.column += len(data) | |
1109 if self.encoding: | |
1110 data = data.encode(self.encoding) | |
1111 self.stream.write(data) | |
1112 start = end | |
1113 elif breaks: | |
1114 if ch not in '\n\x85\u2028\u2029': | |
1115 if text[start] == '\n': | |
1116 self.write_line_break() | |
1117 for br in text[start:end]: | |
1118 if br == '\n': | |
1119 self.write_line_break() | |
1120 else: | |
1121 self.write_line_break(br) | |
1122 self.write_indent() | |
1123 self.whitespace = False | |
1124 self.indention = False | |
1125 start = end | |
1126 else: | |
1127 if ch is None or ch in ' \n\x85\u2028\u2029': | |
1128 data = text[start:end] | |
1129 self.column += len(data) | |
1130 if self.encoding: | |
1131 data = data.encode(self.encoding) | |
1132 self.stream.write(data) | |
1133 start = end | |
1134 if ch is not None: | |
1135 spaces = (ch == ' ') | |
1136 breaks = (ch in '\n\x85\u2028\u2029') | |
1137 end += 1 |