Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/webencodings/tests.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 # coding: utf-8 | |
| 2 """ | |
| 3 | |
| 4 webencodings.tests | |
| 5 ~~~~~~~~~~~~~~~~~~ | |
| 6 | |
| 7 A basic test suite for Encoding. | |
| 8 | |
| 9 :copyright: Copyright 2012 by Simon Sapin | |
| 10 :license: BSD, see LICENSE for details. | |
| 11 | |
| 12 """ | |
| 13 | |
| 14 from __future__ import unicode_literals | |
| 15 | |
| 16 from . import (lookup, LABELS, decode, encode, iter_decode, iter_encode, | |
| 17 IncrementalDecoder, IncrementalEncoder, UTF8) | |
| 18 | |
| 19 | |
| 20 def assert_raises(exception, function, *args, **kwargs): | |
| 21 try: | |
| 22 function(*args, **kwargs) | |
| 23 except exception: | |
| 24 return | |
| 25 else: # pragma: no cover | |
| 26 raise AssertionError('Did not raise %s.' % exception) | |
| 27 | |
| 28 | |
| 29 def test_labels(): | |
| 30 assert lookup('utf-8').name == 'utf-8' | |
| 31 assert lookup('Utf-8').name == 'utf-8' | |
| 32 assert lookup('UTF-8').name == 'utf-8' | |
| 33 assert lookup('utf8').name == 'utf-8' | |
| 34 assert lookup('utf8').name == 'utf-8' | |
| 35 assert lookup('utf8 ').name == 'utf-8' | |
| 36 assert lookup(' \r\nutf8\t').name == 'utf-8' | |
| 37 assert lookup('u8') is None # Python label. | |
| 38 assert lookup('utf-8 ') is None # Non-ASCII white space. | |
| 39 | |
| 40 assert lookup('US-ASCII').name == 'windows-1252' | |
| 41 assert lookup('iso-8859-1').name == 'windows-1252' | |
| 42 assert lookup('latin1').name == 'windows-1252' | |
| 43 assert lookup('LATIN1').name == 'windows-1252' | |
| 44 assert lookup('latin-1') is None | |
| 45 assert lookup('LATİN1') is None # ASCII-only case insensitivity. | |
| 46 | |
| 47 | |
| 48 def test_all_labels(): | |
| 49 for label in LABELS: | |
| 50 assert decode(b'', label) == ('', lookup(label)) | |
| 51 assert encode('', label) == b'' | |
| 52 for repeat in [0, 1, 12]: | |
| 53 output, _ = iter_decode([b''] * repeat, label) | |
| 54 assert list(output) == [] | |
| 55 assert list(iter_encode([''] * repeat, label)) == [] | |
| 56 decoder = IncrementalDecoder(label) | |
| 57 assert decoder.decode(b'') == '' | |
| 58 assert decoder.decode(b'', final=True) == '' | |
| 59 encoder = IncrementalEncoder(label) | |
| 60 assert encoder.encode('') == b'' | |
| 61 assert encoder.encode('', final=True) == b'' | |
| 62 # All encoding names are valid labels too: | |
| 63 for name in set(LABELS.values()): | |
| 64 assert lookup(name).name == name | |
| 65 | |
| 66 | |
| 67 def test_invalid_label(): | |
| 68 assert_raises(LookupError, decode, b'\xEF\xBB\xBF\xc3\xa9', 'invalid') | |
| 69 assert_raises(LookupError, encode, 'é', 'invalid') | |
| 70 assert_raises(LookupError, iter_decode, [], 'invalid') | |
| 71 assert_raises(LookupError, iter_encode, [], 'invalid') | |
| 72 assert_raises(LookupError, IncrementalDecoder, 'invalid') | |
| 73 assert_raises(LookupError, IncrementalEncoder, 'invalid') | |
| 74 | |
| 75 | |
| 76 def test_decode(): | |
| 77 assert decode(b'\x80', 'latin1') == ('€', lookup('latin1')) | |
| 78 assert decode(b'\x80', lookup('latin1')) == ('€', lookup('latin1')) | |
| 79 assert decode(b'\xc3\xa9', 'utf8') == ('é', lookup('utf8')) | |
| 80 assert decode(b'\xc3\xa9', UTF8) == ('é', lookup('utf8')) | |
| 81 assert decode(b'\xc3\xa9', 'ascii') == ('é', lookup('ascii')) | |
| 82 assert decode(b'\xEF\xBB\xBF\xc3\xa9', 'ascii') == ('é', lookup('utf8')) # UTF-8 with BOM | |
| 83 | |
| 84 assert decode(b'\xFE\xFF\x00\xe9', 'ascii') == ('é', lookup('utf-16be')) # UTF-16-BE with BOM | |
| 85 assert decode(b'\xFF\xFE\xe9\x00', 'ascii') == ('é', lookup('utf-16le')) # UTF-16-LE with BOM | |
| 86 assert decode(b'\xFE\xFF\xe9\x00', 'ascii') == ('\ue900', lookup('utf-16be')) | |
| 87 assert decode(b'\xFF\xFE\x00\xe9', 'ascii') == ('\ue900', lookup('utf-16le')) | |
| 88 | |
| 89 assert decode(b'\x00\xe9', 'UTF-16BE') == ('é', lookup('utf-16be')) | |
| 90 assert decode(b'\xe9\x00', 'UTF-16LE') == ('é', lookup('utf-16le')) | |
| 91 assert decode(b'\xe9\x00', 'UTF-16') == ('é', lookup('utf-16le')) | |
| 92 | |
| 93 assert decode(b'\xe9\x00', 'UTF-16BE') == ('\ue900', lookup('utf-16be')) | |
| 94 assert decode(b'\x00\xe9', 'UTF-16LE') == ('\ue900', lookup('utf-16le')) | |
| 95 assert decode(b'\x00\xe9', 'UTF-16') == ('\ue900', lookup('utf-16le')) | |
| 96 | |
| 97 | |
| 98 def test_encode(): | |
| 99 assert encode('é', 'latin1') == b'\xe9' | |
| 100 assert encode('é', 'utf8') == b'\xc3\xa9' | |
| 101 assert encode('é', 'utf8') == b'\xc3\xa9' | |
| 102 assert encode('é', 'utf-16') == b'\xe9\x00' | |
| 103 assert encode('é', 'utf-16le') == b'\xe9\x00' | |
| 104 assert encode('é', 'utf-16be') == b'\x00\xe9' | |
| 105 | |
| 106 | |
| 107 def test_iter_decode(): | |
| 108 def iter_decode_to_string(input, fallback_encoding): | |
| 109 output, _encoding = iter_decode(input, fallback_encoding) | |
| 110 return ''.join(output) | |
| 111 assert iter_decode_to_string([], 'latin1') == '' | |
| 112 assert iter_decode_to_string([b''], 'latin1') == '' | |
| 113 assert iter_decode_to_string([b'\xe9'], 'latin1') == 'é' | |
| 114 assert iter_decode_to_string([b'hello'], 'latin1') == 'hello' | |
| 115 assert iter_decode_to_string([b'he', b'llo'], 'latin1') == 'hello' | |
| 116 assert iter_decode_to_string([b'hell', b'o'], 'latin1') == 'hello' | |
| 117 assert iter_decode_to_string([b'\xc3\xa9'], 'latin1') == 'é' | |
| 118 assert iter_decode_to_string([b'\xEF\xBB\xBF\xc3\xa9'], 'latin1') == 'é' | |
| 119 assert iter_decode_to_string([ | |
| 120 b'\xEF\xBB\xBF', b'\xc3', b'\xa9'], 'latin1') == 'é' | |
| 121 assert iter_decode_to_string([ | |
| 122 b'\xEF\xBB\xBF', b'a', b'\xc3'], 'latin1') == 'a\uFFFD' | |
| 123 assert iter_decode_to_string([ | |
| 124 b'', b'\xEF', b'', b'', b'\xBB\xBF\xc3', b'\xa9'], 'latin1') == 'é' | |
| 125 assert iter_decode_to_string([b'\xEF\xBB\xBF'], 'latin1') == '' | |
| 126 assert iter_decode_to_string([b'\xEF\xBB'], 'latin1') == 'ï»' | |
| 127 assert iter_decode_to_string([b'\xFE\xFF\x00\xe9'], 'latin1') == 'é' | |
| 128 assert iter_decode_to_string([b'\xFF\xFE\xe9\x00'], 'latin1') == 'é' | |
| 129 assert iter_decode_to_string([ | |
| 130 b'', b'\xFF', b'', b'', b'\xFE\xe9', b'\x00'], 'latin1') == 'é' | |
| 131 assert iter_decode_to_string([ | |
| 132 b'', b'h\xe9', b'llo'], 'x-user-defined') == 'h\uF7E9llo' | |
| 133 | |
| 134 | |
| 135 def test_iter_encode(): | |
| 136 assert b''.join(iter_encode([], 'latin1')) == b'' | |
| 137 assert b''.join(iter_encode([''], 'latin1')) == b'' | |
| 138 assert b''.join(iter_encode(['é'], 'latin1')) == b'\xe9' | |
| 139 assert b''.join(iter_encode(['', 'é', '', ''], 'latin1')) == b'\xe9' | |
| 140 assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16')) == b'\xe9\x00' | |
| 141 assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16le')) == b'\xe9\x00' | |
| 142 assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16be')) == b'\x00\xe9' | |
| 143 assert b''.join(iter_encode([ | |
| 144 '', 'h\uF7E9', '', 'llo'], 'x-user-defined')) == b'h\xe9llo' | |
| 145 | |
| 146 | |
| 147 def test_x_user_defined(): | |
| 148 encoded = b'2,\x0c\x0b\x1aO\xd9#\xcb\x0f\xc9\xbbt\xcf\xa8\xca' | |
| 149 decoded = '2,\x0c\x0b\x1aO\uf7d9#\uf7cb\x0f\uf7c9\uf7bbt\uf7cf\uf7a8\uf7ca' | |
| 150 encoded = b'aa' | |
| 151 decoded = 'aa' | |
| 152 assert decode(encoded, 'x-user-defined') == (decoded, lookup('x-user-defined')) | |
| 153 assert encode(decoded, 'x-user-defined') == encoded |
