comparison env/lib/python3.7/site-packages/chardet/mbcssm.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
comparison
equal deleted inserted replaced
4:79f47841a781 5:9b1c78e6ba9c
1 ######################## BEGIN LICENSE BLOCK ########################
2 # The Original Code is mozilla.org code.
3 #
4 # The Initial Developer of the Original Code is
5 # Netscape Communications Corporation.
6 # Portions created by the Initial Developer are Copyright (C) 1998
7 # the Initial Developer. All Rights Reserved.
8 #
9 # Contributor(s):
10 # Mark Pilgrim - port to Python
11 #
12 # This library is free software; you can redistribute it and/or
13 # modify it under the terms of the GNU Lesser General Public
14 # License as published by the Free Software Foundation; either
15 # version 2.1 of the License, or (at your option) any later version.
16 #
17 # This library is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 # Lesser General Public License for more details.
21 #
22 # You should have received a copy of the GNU Lesser General Public
23 # License along with this library; if not, write to the Free Software
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 # 02110-1301 USA
26 ######################### END LICENSE BLOCK #########################
27
28 from .enums import MachineState
29
30 # BIG5
31
32 BIG5_CLS = (
33 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
34 1,1,1,1,1,1,0,0, # 08 - 0f
35 1,1,1,1,1,1,1,1, # 10 - 17
36 1,1,1,0,1,1,1,1, # 18 - 1f
37 1,1,1,1,1,1,1,1, # 20 - 27
38 1,1,1,1,1,1,1,1, # 28 - 2f
39 1,1,1,1,1,1,1,1, # 30 - 37
40 1,1,1,1,1,1,1,1, # 38 - 3f
41 2,2,2,2,2,2,2,2, # 40 - 47
42 2,2,2,2,2,2,2,2, # 48 - 4f
43 2,2,2,2,2,2,2,2, # 50 - 57
44 2,2,2,2,2,2,2,2, # 58 - 5f
45 2,2,2,2,2,2,2,2, # 60 - 67
46 2,2,2,2,2,2,2,2, # 68 - 6f
47 2,2,2,2,2,2,2,2, # 70 - 77
48 2,2,2,2,2,2,2,1, # 78 - 7f
49 4,4,4,4,4,4,4,4, # 80 - 87
50 4,4,4,4,4,4,4,4, # 88 - 8f
51 4,4,4,4,4,4,4,4, # 90 - 97
52 4,4,4,4,4,4,4,4, # 98 - 9f
53 4,3,3,3,3,3,3,3, # a0 - a7
54 3,3,3,3,3,3,3,3, # a8 - af
55 3,3,3,3,3,3,3,3, # b0 - b7
56 3,3,3,3,3,3,3,3, # b8 - bf
57 3,3,3,3,3,3,3,3, # c0 - c7
58 3,3,3,3,3,3,3,3, # c8 - cf
59 3,3,3,3,3,3,3,3, # d0 - d7
60 3,3,3,3,3,3,3,3, # d8 - df
61 3,3,3,3,3,3,3,3, # e0 - e7
62 3,3,3,3,3,3,3,3, # e8 - ef
63 3,3,3,3,3,3,3,3, # f0 - f7
64 3,3,3,3,3,3,3,0 # f8 - ff
65 )
66
67 BIG5_ST = (
68 MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
69 MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
70 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
71 )
72
73 BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
74
75 BIG5_SM_MODEL = {'class_table': BIG5_CLS,
76 'class_factor': 5,
77 'state_table': BIG5_ST,
78 'char_len_table': BIG5_CHAR_LEN_TABLE,
79 'name': 'Big5'}
80
81 # CP949
82
83 CP949_CLS = (
84 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
85 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
86 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
87 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
88 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
89 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
90 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
91 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
92 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
93 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
94 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
95 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
96 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
97 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
98 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
99 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
100 )
101
102 CP949_ST = (
103 #cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
104 MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
105 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
106 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
107 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
108 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
109 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
110 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
111 )
112
113 CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
114
115 CP949_SM_MODEL = {'class_table': CP949_CLS,
116 'class_factor': 10,
117 'state_table': CP949_ST,
118 'char_len_table': CP949_CHAR_LEN_TABLE,
119 'name': 'CP949'}
120
121 # EUC-JP
122
123 EUCJP_CLS = (
124 4,4,4,4,4,4,4,4, # 00 - 07
125 4,4,4,4,4,4,5,5, # 08 - 0f
126 4,4,4,4,4,4,4,4, # 10 - 17
127 4,4,4,5,4,4,4,4, # 18 - 1f
128 4,4,4,4,4,4,4,4, # 20 - 27
129 4,4,4,4,4,4,4,4, # 28 - 2f
130 4,4,4,4,4,4,4,4, # 30 - 37
131 4,4,4,4,4,4,4,4, # 38 - 3f
132 4,4,4,4,4,4,4,4, # 40 - 47
133 4,4,4,4,4,4,4,4, # 48 - 4f
134 4,4,4,4,4,4,4,4, # 50 - 57
135 4,4,4,4,4,4,4,4, # 58 - 5f
136 4,4,4,4,4,4,4,4, # 60 - 67
137 4,4,4,4,4,4,4,4, # 68 - 6f
138 4,4,4,4,4,4,4,4, # 70 - 77
139 4,4,4,4,4,4,4,4, # 78 - 7f
140 5,5,5,5,5,5,5,5, # 80 - 87
141 5,5,5,5,5,5,1,3, # 88 - 8f
142 5,5,5,5,5,5,5,5, # 90 - 97
143 5,5,5,5,5,5,5,5, # 98 - 9f
144 5,2,2,2,2,2,2,2, # a0 - a7
145 2,2,2,2,2,2,2,2, # a8 - af
146 2,2,2,2,2,2,2,2, # b0 - b7
147 2,2,2,2,2,2,2,2, # b8 - bf
148 2,2,2,2,2,2,2,2, # c0 - c7
149 2,2,2,2,2,2,2,2, # c8 - cf
150 2,2,2,2,2,2,2,2, # d0 - d7
151 2,2,2,2,2,2,2,2, # d8 - df
152 0,0,0,0,0,0,0,0, # e0 - e7
153 0,0,0,0,0,0,0,0, # e8 - ef
154 0,0,0,0,0,0,0,0, # f0 - f7
155 0,0,0,0,0,0,0,5 # f8 - ff
156 )
157
158 EUCJP_ST = (
159 3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
160 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
161 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
162 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
163 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
164 )
165
166 EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
167
168 EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
169 'class_factor': 6,
170 'state_table': EUCJP_ST,
171 'char_len_table': EUCJP_CHAR_LEN_TABLE,
172 'name': 'EUC-JP'}
173
174 # EUC-KR
175
176 EUCKR_CLS = (
177 1,1,1,1,1,1,1,1, # 00 - 07
178 1,1,1,1,1,1,0,0, # 08 - 0f
179 1,1,1,1,1,1,1,1, # 10 - 17
180 1,1,1,0,1,1,1,1, # 18 - 1f
181 1,1,1,1,1,1,1,1, # 20 - 27
182 1,1,1,1,1,1,1,1, # 28 - 2f
183 1,1,1,1,1,1,1,1, # 30 - 37
184 1,1,1,1,1,1,1,1, # 38 - 3f
185 1,1,1,1,1,1,1,1, # 40 - 47
186 1,1,1,1,1,1,1,1, # 48 - 4f
187 1,1,1,1,1,1,1,1, # 50 - 57
188 1,1,1,1,1,1,1,1, # 58 - 5f
189 1,1,1,1,1,1,1,1, # 60 - 67
190 1,1,1,1,1,1,1,1, # 68 - 6f
191 1,1,1,1,1,1,1,1, # 70 - 77
192 1,1,1,1,1,1,1,1, # 78 - 7f
193 0,0,0,0,0,0,0,0, # 80 - 87
194 0,0,0,0,0,0,0,0, # 88 - 8f
195 0,0,0,0,0,0,0,0, # 90 - 97
196 0,0,0,0,0,0,0,0, # 98 - 9f
197 0,2,2,2,2,2,2,2, # a0 - a7
198 2,2,2,2,2,3,3,3, # a8 - af
199 2,2,2,2,2,2,2,2, # b0 - b7
200 2,2,2,2,2,2,2,2, # b8 - bf
201 2,2,2,2,2,2,2,2, # c0 - c7
202 2,3,2,2,2,2,2,2, # c8 - cf
203 2,2,2,2,2,2,2,2, # d0 - d7
204 2,2,2,2,2,2,2,2, # d8 - df
205 2,2,2,2,2,2,2,2, # e0 - e7
206 2,2,2,2,2,2,2,2, # e8 - ef
207 2,2,2,2,2,2,2,2, # f0 - f7
208 2,2,2,2,2,2,2,0 # f8 - ff
209 )
210
211 EUCKR_ST = (
212 MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
213 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
214 )
215
216 EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
217
218 EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
219 'class_factor': 4,
220 'state_table': EUCKR_ST,
221 'char_len_table': EUCKR_CHAR_LEN_TABLE,
222 'name': 'EUC-KR'}
223
224 # EUC-TW
225
226 EUCTW_CLS = (
227 2,2,2,2,2,2,2,2, # 00 - 07
228 2,2,2,2,2,2,0,0, # 08 - 0f
229 2,2,2,2,2,2,2,2, # 10 - 17
230 2,2,2,0,2,2,2,2, # 18 - 1f
231 2,2,2,2,2,2,2,2, # 20 - 27
232 2,2,2,2,2,2,2,2, # 28 - 2f
233 2,2,2,2,2,2,2,2, # 30 - 37
234 2,2,2,2,2,2,2,2, # 38 - 3f
235 2,2,2,2,2,2,2,2, # 40 - 47
236 2,2,2,2,2,2,2,2, # 48 - 4f
237 2,2,2,2,2,2,2,2, # 50 - 57
238 2,2,2,2,2,2,2,2, # 58 - 5f
239 2,2,2,2,2,2,2,2, # 60 - 67
240 2,2,2,2,2,2,2,2, # 68 - 6f
241 2,2,2,2,2,2,2,2, # 70 - 77
242 2,2,2,2,2,2,2,2, # 78 - 7f
243 0,0,0,0,0,0,0,0, # 80 - 87
244 0,0,0,0,0,0,6,0, # 88 - 8f
245 0,0,0,0,0,0,0,0, # 90 - 97
246 0,0,0,0,0,0,0,0, # 98 - 9f
247 0,3,4,4,4,4,4,4, # a0 - a7
248 5,5,1,1,1,1,1,1, # a8 - af
249 1,1,1,1,1,1,1,1, # b0 - b7
250 1,1,1,1,1,1,1,1, # b8 - bf
251 1,1,3,1,3,3,3,3, # c0 - c7
252 3,3,3,3,3,3,3,3, # c8 - cf
253 3,3,3,3,3,3,3,3, # d0 - d7
254 3,3,3,3,3,3,3,3, # d8 - df
255 3,3,3,3,3,3,3,3, # e0 - e7
256 3,3,3,3,3,3,3,3, # e8 - ef
257 3,3,3,3,3,3,3,3, # f0 - f7
258 3,3,3,3,3,3,3,0 # f8 - ff
259 )
260
261 EUCTW_ST = (
262 MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
263 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
264 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
265 MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
266 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
267 MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
268 )
269
270 EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
271
272 EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
273 'class_factor': 7,
274 'state_table': EUCTW_ST,
275 'char_len_table': EUCTW_CHAR_LEN_TABLE,
276 'name': 'x-euc-tw'}
277
278 # GB2312
279
280 GB2312_CLS = (
281 1,1,1,1,1,1,1,1, # 00 - 07
282 1,1,1,1,1,1,0,0, # 08 - 0f
283 1,1,1,1,1,1,1,1, # 10 - 17
284 1,1,1,0,1,1,1,1, # 18 - 1f
285 1,1,1,1,1,1,1,1, # 20 - 27
286 1,1,1,1,1,1,1,1, # 28 - 2f
287 3,3,3,3,3,3,3,3, # 30 - 37
288 3,3,1,1,1,1,1,1, # 38 - 3f
289 2,2,2,2,2,2,2,2, # 40 - 47
290 2,2,2,2,2,2,2,2, # 48 - 4f
291 2,2,2,2,2,2,2,2, # 50 - 57
292 2,2,2,2,2,2,2,2, # 58 - 5f
293 2,2,2,2,2,2,2,2, # 60 - 67
294 2,2,2,2,2,2,2,2, # 68 - 6f
295 2,2,2,2,2,2,2,2, # 70 - 77
296 2,2,2,2,2,2,2,4, # 78 - 7f
297 5,6,6,6,6,6,6,6, # 80 - 87
298 6,6,6,6,6,6,6,6, # 88 - 8f
299 6,6,6,6,6,6,6,6, # 90 - 97
300 6,6,6,6,6,6,6,6, # 98 - 9f
301 6,6,6,6,6,6,6,6, # a0 - a7
302 6,6,6,6,6,6,6,6, # a8 - af
303 6,6,6,6,6,6,6,6, # b0 - b7
304 6,6,6,6,6,6,6,6, # b8 - bf
305 6,6,6,6,6,6,6,6, # c0 - c7
306 6,6,6,6,6,6,6,6, # c8 - cf
307 6,6,6,6,6,6,6,6, # d0 - d7
308 6,6,6,6,6,6,6,6, # d8 - df
309 6,6,6,6,6,6,6,6, # e0 - e7
310 6,6,6,6,6,6,6,6, # e8 - ef
311 6,6,6,6,6,6,6,6, # f0 - f7
312 6,6,6,6,6,6,6,0 # f8 - ff
313 )
314
315 GB2312_ST = (
316 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
317 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
318 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
319 4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
320 MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
321 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
322 )
323
324 # To be accurate, the length of class 6 can be either 2 or 4.
325 # But it is not necessary to discriminate between the two since
326 # it is used for frequency analysis only, and we are validating
327 # each code range there as well. So it is safe to set it to be
328 # 2 here.
329 GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
330
331 GB2312_SM_MODEL = {'class_table': GB2312_CLS,
332 'class_factor': 7,
333 'state_table': GB2312_ST,
334 'char_len_table': GB2312_CHAR_LEN_TABLE,
335 'name': 'GB2312'}
336
337 # Shift_JIS
338
339 SJIS_CLS = (
340 1,1,1,1,1,1,1,1, # 00 - 07
341 1,1,1,1,1,1,0,0, # 08 - 0f
342 1,1,1,1,1,1,1,1, # 10 - 17
343 1,1,1,0,1,1,1,1, # 18 - 1f
344 1,1,1,1,1,1,1,1, # 20 - 27
345 1,1,1,1,1,1,1,1, # 28 - 2f
346 1,1,1,1,1,1,1,1, # 30 - 37
347 1,1,1,1,1,1,1,1, # 38 - 3f
348 2,2,2,2,2,2,2,2, # 40 - 47
349 2,2,2,2,2,2,2,2, # 48 - 4f
350 2,2,2,2,2,2,2,2, # 50 - 57
351 2,2,2,2,2,2,2,2, # 58 - 5f
352 2,2,2,2,2,2,2,2, # 60 - 67
353 2,2,2,2,2,2,2,2, # 68 - 6f
354 2,2,2,2,2,2,2,2, # 70 - 77
355 2,2,2,2,2,2,2,1, # 78 - 7f
356 3,3,3,3,3,2,2,3, # 80 - 87
357 3,3,3,3,3,3,3,3, # 88 - 8f
358 3,3,3,3,3,3,3,3, # 90 - 97
359 3,3,3,3,3,3,3,3, # 98 - 9f
360 #0xa0 is illegal in sjis encoding, but some pages does
361 #contain such byte. We need to be more error forgiven.
362 2,2,2,2,2,2,2,2, # a0 - a7
363 2,2,2,2,2,2,2,2, # a8 - af
364 2,2,2,2,2,2,2,2, # b0 - b7
365 2,2,2,2,2,2,2,2, # b8 - bf
366 2,2,2,2,2,2,2,2, # c0 - c7
367 2,2,2,2,2,2,2,2, # c8 - cf
368 2,2,2,2,2,2,2,2, # d0 - d7
369 2,2,2,2,2,2,2,2, # d8 - df
370 3,3,3,3,3,3,3,3, # e0 - e7
371 3,3,3,3,3,4,4,4, # e8 - ef
372 3,3,3,3,3,3,3,3, # f0 - f7
373 3,3,3,3,3,0,0,0) # f8 - ff
374
375
376 SJIS_ST = (
377 MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
378 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
379 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
380 )
381
382 SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
383
384 SJIS_SM_MODEL = {'class_table': SJIS_CLS,
385 'class_factor': 6,
386 'state_table': SJIS_ST,
387 'char_len_table': SJIS_CHAR_LEN_TABLE,
388 'name': 'Shift_JIS'}
389
390 # UCS2-BE
391
392 UCS2BE_CLS = (
393 0,0,0,0,0,0,0,0, # 00 - 07
394 0,0,1,0,0,2,0,0, # 08 - 0f
395 0,0,0,0,0,0,0,0, # 10 - 17
396 0,0,0,3,0,0,0,0, # 18 - 1f
397 0,0,0,0,0,0,0,0, # 20 - 27
398 0,3,3,3,3,3,0,0, # 28 - 2f
399 0,0,0,0,0,0,0,0, # 30 - 37
400 0,0,0,0,0,0,0,0, # 38 - 3f
401 0,0,0,0,0,0,0,0, # 40 - 47
402 0,0,0,0,0,0,0,0, # 48 - 4f
403 0,0,0,0,0,0,0,0, # 50 - 57
404 0,0,0,0,0,0,0,0, # 58 - 5f
405 0,0,0,0,0,0,0,0, # 60 - 67
406 0,0,0,0,0,0,0,0, # 68 - 6f
407 0,0,0,0,0,0,0,0, # 70 - 77
408 0,0,0,0,0,0,0,0, # 78 - 7f
409 0,0,0,0,0,0,0,0, # 80 - 87
410 0,0,0,0,0,0,0,0, # 88 - 8f
411 0,0,0,0,0,0,0,0, # 90 - 97
412 0,0,0,0,0,0,0,0, # 98 - 9f
413 0,0,0,0,0,0,0,0, # a0 - a7
414 0,0,0,0,0,0,0,0, # a8 - af
415 0,0,0,0,0,0,0,0, # b0 - b7
416 0,0,0,0,0,0,0,0, # b8 - bf
417 0,0,0,0,0,0,0,0, # c0 - c7
418 0,0,0,0,0,0,0,0, # c8 - cf
419 0,0,0,0,0,0,0,0, # d0 - d7
420 0,0,0,0,0,0,0,0, # d8 - df
421 0,0,0,0,0,0,0,0, # e0 - e7
422 0,0,0,0,0,0,0,0, # e8 - ef
423 0,0,0,0,0,0,0,0, # f0 - f7
424 0,0,0,0,0,0,4,5 # f8 - ff
425 )
426
427 UCS2BE_ST = (
428 5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
429 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
430 MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
431 6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
432 6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
433 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
434 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
435 )
436
437 UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
438
439 UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
440 'class_factor': 6,
441 'state_table': UCS2BE_ST,
442 'char_len_table': UCS2BE_CHAR_LEN_TABLE,
443 'name': 'UTF-16BE'}
444
445 # UCS2-LE
446
447 UCS2LE_CLS = (
448 0,0,0,0,0,0,0,0, # 00 - 07
449 0,0,1,0,0,2,0,0, # 08 - 0f
450 0,0,0,0,0,0,0,0, # 10 - 17
451 0,0,0,3,0,0,0,0, # 18 - 1f
452 0,0,0,0,0,0,0,0, # 20 - 27
453 0,3,3,3,3,3,0,0, # 28 - 2f
454 0,0,0,0,0,0,0,0, # 30 - 37
455 0,0,0,0,0,0,0,0, # 38 - 3f
456 0,0,0,0,0,0,0,0, # 40 - 47
457 0,0,0,0,0,0,0,0, # 48 - 4f
458 0,0,0,0,0,0,0,0, # 50 - 57
459 0,0,0,0,0,0,0,0, # 58 - 5f
460 0,0,0,0,0,0,0,0, # 60 - 67
461 0,0,0,0,0,0,0,0, # 68 - 6f
462 0,0,0,0,0,0,0,0, # 70 - 77
463 0,0,0,0,0,0,0,0, # 78 - 7f
464 0,0,0,0,0,0,0,0, # 80 - 87
465 0,0,0,0,0,0,0,0, # 88 - 8f
466 0,0,0,0,0,0,0,0, # 90 - 97
467 0,0,0,0,0,0,0,0, # 98 - 9f
468 0,0,0,0,0,0,0,0, # a0 - a7
469 0,0,0,0,0,0,0,0, # a8 - af
470 0,0,0,0,0,0,0,0, # b0 - b7
471 0,0,0,0,0,0,0,0, # b8 - bf
472 0,0,0,0,0,0,0,0, # c0 - c7
473 0,0,0,0,0,0,0,0, # c8 - cf
474 0,0,0,0,0,0,0,0, # d0 - d7
475 0,0,0,0,0,0,0,0, # d8 - df
476 0,0,0,0,0,0,0,0, # e0 - e7
477 0,0,0,0,0,0,0,0, # e8 - ef
478 0,0,0,0,0,0,0,0, # f0 - f7
479 0,0,0,0,0,0,4,5 # f8 - ff
480 )
481
482 UCS2LE_ST = (
483 6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
484 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
485 MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
486 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
487 7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
488 5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
489 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
490 )
491
492 UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
493
494 UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
495 'class_factor': 6,
496 'state_table': UCS2LE_ST,
497 'char_len_table': UCS2LE_CHAR_LEN_TABLE,
498 'name': 'UTF-16LE'}
499
500 # UTF-8
501
502 UTF8_CLS = (
503 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
504 1,1,1,1,1,1,0,0, # 08 - 0f
505 1,1,1,1,1,1,1,1, # 10 - 17
506 1,1,1,0,1,1,1,1, # 18 - 1f
507 1,1,1,1,1,1,1,1, # 20 - 27
508 1,1,1,1,1,1,1,1, # 28 - 2f
509 1,1,1,1,1,1,1,1, # 30 - 37
510 1,1,1,1,1,1,1,1, # 38 - 3f
511 1,1,1,1,1,1,1,1, # 40 - 47
512 1,1,1,1,1,1,1,1, # 48 - 4f
513 1,1,1,1,1,1,1,1, # 50 - 57
514 1,1,1,1,1,1,1,1, # 58 - 5f
515 1,1,1,1,1,1,1,1, # 60 - 67
516 1,1,1,1,1,1,1,1, # 68 - 6f
517 1,1,1,1,1,1,1,1, # 70 - 77
518 1,1,1,1,1,1,1,1, # 78 - 7f
519 2,2,2,2,3,3,3,3, # 80 - 87
520 4,4,4,4,4,4,4,4, # 88 - 8f
521 4,4,4,4,4,4,4,4, # 90 - 97
522 4,4,4,4,4,4,4,4, # 98 - 9f
523 5,5,5,5,5,5,5,5, # a0 - a7
524 5,5,5,5,5,5,5,5, # a8 - af
525 5,5,5,5,5,5,5,5, # b0 - b7
526 5,5,5,5,5,5,5,5, # b8 - bf
527 0,0,6,6,6,6,6,6, # c0 - c7
528 6,6,6,6,6,6,6,6, # c8 - cf
529 6,6,6,6,6,6,6,6, # d0 - d7
530 6,6,6,6,6,6,6,6, # d8 - df
531 7,8,8,8,8,8,8,8, # e0 - e7
532 8,8,8,8,8,9,8,8, # e8 - ef
533 10,11,11,11,11,11,11,11, # f0 - f7
534 12,13,13,13,14,15,0,0 # f8 - ff
535 )
536
537 UTF8_ST = (
538 MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
539 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
540 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
541 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
542 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
543 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
544 MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
545 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
546 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
547 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
548 MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
549 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
550 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
551 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
552 MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
553 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
554 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
555 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
556 MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
557 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
558 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
559 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
560 MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
561 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
562 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
563 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
564 )
565
566 UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
567
568 UTF8_SM_MODEL = {'class_table': UTF8_CLS,
569 'class_factor': 16,
570 'state_table': UTF8_ST,
571 'char_len_table': UTF8_CHAR_LEN_TABLE,
572 'name': 'UTF-8'}