Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/chardet/mbcssm.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 ######################## BEGIN LICENSE BLOCK ######################## | |
2 # The Original Code is mozilla.org code. | |
3 # | |
4 # The Initial Developer of the Original Code is | |
5 # Netscape Communications Corporation. | |
6 # Portions created by the Initial Developer are Copyright (C) 1998 | |
7 # the Initial Developer. All Rights Reserved. | |
8 # | |
9 # Contributor(s): | |
10 # Mark Pilgrim - port to Python | |
11 # | |
12 # This library is free software; you can redistribute it and/or | |
13 # modify it under the terms of the GNU Lesser General Public | |
14 # License as published by the Free Software Foundation; either | |
15 # version 2.1 of the License, or (at your option) any later version. | |
16 # | |
17 # This library is distributed in the hope that it will be useful, | |
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 # Lesser General Public License for more details. | |
21 # | |
22 # You should have received a copy of the GNU Lesser General Public | |
23 # License along with this library; if not, write to the Free Software | |
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | |
25 # 02110-1301 USA | |
26 ######################### END LICENSE BLOCK ######################### | |
27 | |
28 from .enums import MachineState | |
29 | |
30 # BIG5 | |
31 | |
32 BIG5_CLS = ( | |
33 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value | |
34 1,1,1,1,1,1,0,0, # 08 - 0f | |
35 1,1,1,1,1,1,1,1, # 10 - 17 | |
36 1,1,1,0,1,1,1,1, # 18 - 1f | |
37 1,1,1,1,1,1,1,1, # 20 - 27 | |
38 1,1,1,1,1,1,1,1, # 28 - 2f | |
39 1,1,1,1,1,1,1,1, # 30 - 37 | |
40 1,1,1,1,1,1,1,1, # 38 - 3f | |
41 2,2,2,2,2,2,2,2, # 40 - 47 | |
42 2,2,2,2,2,2,2,2, # 48 - 4f | |
43 2,2,2,2,2,2,2,2, # 50 - 57 | |
44 2,2,2,2,2,2,2,2, # 58 - 5f | |
45 2,2,2,2,2,2,2,2, # 60 - 67 | |
46 2,2,2,2,2,2,2,2, # 68 - 6f | |
47 2,2,2,2,2,2,2,2, # 70 - 77 | |
48 2,2,2,2,2,2,2,1, # 78 - 7f | |
49 4,4,4,4,4,4,4,4, # 80 - 87 | |
50 4,4,4,4,4,4,4,4, # 88 - 8f | |
51 4,4,4,4,4,4,4,4, # 90 - 97 | |
52 4,4,4,4,4,4,4,4, # 98 - 9f | |
53 4,3,3,3,3,3,3,3, # a0 - a7 | |
54 3,3,3,3,3,3,3,3, # a8 - af | |
55 3,3,3,3,3,3,3,3, # b0 - b7 | |
56 3,3,3,3,3,3,3,3, # b8 - bf | |
57 3,3,3,3,3,3,3,3, # c0 - c7 | |
58 3,3,3,3,3,3,3,3, # c8 - cf | |
59 3,3,3,3,3,3,3,3, # d0 - d7 | |
60 3,3,3,3,3,3,3,3, # d8 - df | |
61 3,3,3,3,3,3,3,3, # e0 - e7 | |
62 3,3,3,3,3,3,3,3, # e8 - ef | |
63 3,3,3,3,3,3,3,3, # f0 - f7 | |
64 3,3,3,3,3,3,3,0 # f8 - ff | |
65 ) | |
66 | |
67 BIG5_ST = ( | |
68 MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 | |
69 MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f | |
70 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17 | |
71 ) | |
72 | |
73 BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) | |
74 | |
75 BIG5_SM_MODEL = {'class_table': BIG5_CLS, | |
76 'class_factor': 5, | |
77 'state_table': BIG5_ST, | |
78 'char_len_table': BIG5_CHAR_LEN_TABLE, | |
79 'name': 'Big5'} | |
80 | |
81 # CP949 | |
82 | |
83 CP949_CLS = ( | |
84 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f | |
85 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f | |
86 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f | |
87 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f | |
88 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f | |
89 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f | |
90 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f | |
91 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f | |
92 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f | |
93 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f | |
94 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af | |
95 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf | |
96 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf | |
97 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df | |
98 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef | |
99 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff | |
100 ) | |
101 | |
102 CP949_ST = ( | |
103 #cls= 0 1 2 3 4 5 6 7 8 9 # previous state = | |
104 MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START | |
105 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR | |
106 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME | |
107 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3 | |
108 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4 | |
109 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5 | |
110 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6 | |
111 ) | |
112 | |
113 CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) | |
114 | |
115 CP949_SM_MODEL = {'class_table': CP949_CLS, | |
116 'class_factor': 10, | |
117 'state_table': CP949_ST, | |
118 'char_len_table': CP949_CHAR_LEN_TABLE, | |
119 'name': 'CP949'} | |
120 | |
121 # EUC-JP | |
122 | |
123 EUCJP_CLS = ( | |
124 4,4,4,4,4,4,4,4, # 00 - 07 | |
125 4,4,4,4,4,4,5,5, # 08 - 0f | |
126 4,4,4,4,4,4,4,4, # 10 - 17 | |
127 4,4,4,5,4,4,4,4, # 18 - 1f | |
128 4,4,4,4,4,4,4,4, # 20 - 27 | |
129 4,4,4,4,4,4,4,4, # 28 - 2f | |
130 4,4,4,4,4,4,4,4, # 30 - 37 | |
131 4,4,4,4,4,4,4,4, # 38 - 3f | |
132 4,4,4,4,4,4,4,4, # 40 - 47 | |
133 4,4,4,4,4,4,4,4, # 48 - 4f | |
134 4,4,4,4,4,4,4,4, # 50 - 57 | |
135 4,4,4,4,4,4,4,4, # 58 - 5f | |
136 4,4,4,4,4,4,4,4, # 60 - 67 | |
137 4,4,4,4,4,4,4,4, # 68 - 6f | |
138 4,4,4,4,4,4,4,4, # 70 - 77 | |
139 4,4,4,4,4,4,4,4, # 78 - 7f | |
140 5,5,5,5,5,5,5,5, # 80 - 87 | |
141 5,5,5,5,5,5,1,3, # 88 - 8f | |
142 5,5,5,5,5,5,5,5, # 90 - 97 | |
143 5,5,5,5,5,5,5,5, # 98 - 9f | |
144 5,2,2,2,2,2,2,2, # a0 - a7 | |
145 2,2,2,2,2,2,2,2, # a8 - af | |
146 2,2,2,2,2,2,2,2, # b0 - b7 | |
147 2,2,2,2,2,2,2,2, # b8 - bf | |
148 2,2,2,2,2,2,2,2, # c0 - c7 | |
149 2,2,2,2,2,2,2,2, # c8 - cf | |
150 2,2,2,2,2,2,2,2, # d0 - d7 | |
151 2,2,2,2,2,2,2,2, # d8 - df | |
152 0,0,0,0,0,0,0,0, # e0 - e7 | |
153 0,0,0,0,0,0,0,0, # e8 - ef | |
154 0,0,0,0,0,0,0,0, # f0 - f7 | |
155 0,0,0,0,0,0,0,5 # f8 - ff | |
156 ) | |
157 | |
158 EUCJP_ST = ( | |
159 3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 | |
160 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | |
161 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 | |
162 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f | |
163 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27 | |
164 ) | |
165 | |
166 EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) | |
167 | |
168 EUCJP_SM_MODEL = {'class_table': EUCJP_CLS, | |
169 'class_factor': 6, | |
170 'state_table': EUCJP_ST, | |
171 'char_len_table': EUCJP_CHAR_LEN_TABLE, | |
172 'name': 'EUC-JP'} | |
173 | |
174 # EUC-KR | |
175 | |
176 EUCKR_CLS = ( | |
177 1,1,1,1,1,1,1,1, # 00 - 07 | |
178 1,1,1,1,1,1,0,0, # 08 - 0f | |
179 1,1,1,1,1,1,1,1, # 10 - 17 | |
180 1,1,1,0,1,1,1,1, # 18 - 1f | |
181 1,1,1,1,1,1,1,1, # 20 - 27 | |
182 1,1,1,1,1,1,1,1, # 28 - 2f | |
183 1,1,1,1,1,1,1,1, # 30 - 37 | |
184 1,1,1,1,1,1,1,1, # 38 - 3f | |
185 1,1,1,1,1,1,1,1, # 40 - 47 | |
186 1,1,1,1,1,1,1,1, # 48 - 4f | |
187 1,1,1,1,1,1,1,1, # 50 - 57 | |
188 1,1,1,1,1,1,1,1, # 58 - 5f | |
189 1,1,1,1,1,1,1,1, # 60 - 67 | |
190 1,1,1,1,1,1,1,1, # 68 - 6f | |
191 1,1,1,1,1,1,1,1, # 70 - 77 | |
192 1,1,1,1,1,1,1,1, # 78 - 7f | |
193 0,0,0,0,0,0,0,0, # 80 - 87 | |
194 0,0,0,0,0,0,0,0, # 88 - 8f | |
195 0,0,0,0,0,0,0,0, # 90 - 97 | |
196 0,0,0,0,0,0,0,0, # 98 - 9f | |
197 0,2,2,2,2,2,2,2, # a0 - a7 | |
198 2,2,2,2,2,3,3,3, # a8 - af | |
199 2,2,2,2,2,2,2,2, # b0 - b7 | |
200 2,2,2,2,2,2,2,2, # b8 - bf | |
201 2,2,2,2,2,2,2,2, # c0 - c7 | |
202 2,3,2,2,2,2,2,2, # c8 - cf | |
203 2,2,2,2,2,2,2,2, # d0 - d7 | |
204 2,2,2,2,2,2,2,2, # d8 - df | |
205 2,2,2,2,2,2,2,2, # e0 - e7 | |
206 2,2,2,2,2,2,2,2, # e8 - ef | |
207 2,2,2,2,2,2,2,2, # f0 - f7 | |
208 2,2,2,2,2,2,2,0 # f8 - ff | |
209 ) | |
210 | |
211 EUCKR_ST = ( | |
212 MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 | |
213 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f | |
214 ) | |
215 | |
216 EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) | |
217 | |
218 EUCKR_SM_MODEL = {'class_table': EUCKR_CLS, | |
219 'class_factor': 4, | |
220 'state_table': EUCKR_ST, | |
221 'char_len_table': EUCKR_CHAR_LEN_TABLE, | |
222 'name': 'EUC-KR'} | |
223 | |
224 # EUC-TW | |
225 | |
226 EUCTW_CLS = ( | |
227 2,2,2,2,2,2,2,2, # 00 - 07 | |
228 2,2,2,2,2,2,0,0, # 08 - 0f | |
229 2,2,2,2,2,2,2,2, # 10 - 17 | |
230 2,2,2,0,2,2,2,2, # 18 - 1f | |
231 2,2,2,2,2,2,2,2, # 20 - 27 | |
232 2,2,2,2,2,2,2,2, # 28 - 2f | |
233 2,2,2,2,2,2,2,2, # 30 - 37 | |
234 2,2,2,2,2,2,2,2, # 38 - 3f | |
235 2,2,2,2,2,2,2,2, # 40 - 47 | |
236 2,2,2,2,2,2,2,2, # 48 - 4f | |
237 2,2,2,2,2,2,2,2, # 50 - 57 | |
238 2,2,2,2,2,2,2,2, # 58 - 5f | |
239 2,2,2,2,2,2,2,2, # 60 - 67 | |
240 2,2,2,2,2,2,2,2, # 68 - 6f | |
241 2,2,2,2,2,2,2,2, # 70 - 77 | |
242 2,2,2,2,2,2,2,2, # 78 - 7f | |
243 0,0,0,0,0,0,0,0, # 80 - 87 | |
244 0,0,0,0,0,0,6,0, # 88 - 8f | |
245 0,0,0,0,0,0,0,0, # 90 - 97 | |
246 0,0,0,0,0,0,0,0, # 98 - 9f | |
247 0,3,4,4,4,4,4,4, # a0 - a7 | |
248 5,5,1,1,1,1,1,1, # a8 - af | |
249 1,1,1,1,1,1,1,1, # b0 - b7 | |
250 1,1,1,1,1,1,1,1, # b8 - bf | |
251 1,1,3,1,3,3,3,3, # c0 - c7 | |
252 3,3,3,3,3,3,3,3, # c8 - cf | |
253 3,3,3,3,3,3,3,3, # d0 - d7 | |
254 3,3,3,3,3,3,3,3, # d8 - df | |
255 3,3,3,3,3,3,3,3, # e0 - e7 | |
256 3,3,3,3,3,3,3,3, # e8 - ef | |
257 3,3,3,3,3,3,3,3, # f0 - f7 | |
258 3,3,3,3,3,3,3,0 # f8 - ff | |
259 ) | |
260 | |
261 EUCTW_ST = ( | |
262 MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07 | |
263 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | |
264 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17 | |
265 MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f | |
266 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27 | |
267 MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f | |
268 ) | |
269 | |
270 EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) | |
271 | |
272 EUCTW_SM_MODEL = {'class_table': EUCTW_CLS, | |
273 'class_factor': 7, | |
274 'state_table': EUCTW_ST, | |
275 'char_len_table': EUCTW_CHAR_LEN_TABLE, | |
276 'name': 'x-euc-tw'} | |
277 | |
278 # GB2312 | |
279 | |
280 GB2312_CLS = ( | |
281 1,1,1,1,1,1,1,1, # 00 - 07 | |
282 1,1,1,1,1,1,0,0, # 08 - 0f | |
283 1,1,1,1,1,1,1,1, # 10 - 17 | |
284 1,1,1,0,1,1,1,1, # 18 - 1f | |
285 1,1,1,1,1,1,1,1, # 20 - 27 | |
286 1,1,1,1,1,1,1,1, # 28 - 2f | |
287 3,3,3,3,3,3,3,3, # 30 - 37 | |
288 3,3,1,1,1,1,1,1, # 38 - 3f | |
289 2,2,2,2,2,2,2,2, # 40 - 47 | |
290 2,2,2,2,2,2,2,2, # 48 - 4f | |
291 2,2,2,2,2,2,2,2, # 50 - 57 | |
292 2,2,2,2,2,2,2,2, # 58 - 5f | |
293 2,2,2,2,2,2,2,2, # 60 - 67 | |
294 2,2,2,2,2,2,2,2, # 68 - 6f | |
295 2,2,2,2,2,2,2,2, # 70 - 77 | |
296 2,2,2,2,2,2,2,4, # 78 - 7f | |
297 5,6,6,6,6,6,6,6, # 80 - 87 | |
298 6,6,6,6,6,6,6,6, # 88 - 8f | |
299 6,6,6,6,6,6,6,6, # 90 - 97 | |
300 6,6,6,6,6,6,6,6, # 98 - 9f | |
301 6,6,6,6,6,6,6,6, # a0 - a7 | |
302 6,6,6,6,6,6,6,6, # a8 - af | |
303 6,6,6,6,6,6,6,6, # b0 - b7 | |
304 6,6,6,6,6,6,6,6, # b8 - bf | |
305 6,6,6,6,6,6,6,6, # c0 - c7 | |
306 6,6,6,6,6,6,6,6, # c8 - cf | |
307 6,6,6,6,6,6,6,6, # d0 - d7 | |
308 6,6,6,6,6,6,6,6, # d8 - df | |
309 6,6,6,6,6,6,6,6, # e0 - e7 | |
310 6,6,6,6,6,6,6,6, # e8 - ef | |
311 6,6,6,6,6,6,6,6, # f0 - f7 | |
312 6,6,6,6,6,6,6,0 # f8 - ff | |
313 ) | |
314 | |
315 GB2312_ST = ( | |
316 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07 | |
317 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | |
318 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17 | |
319 4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f | |
320 MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27 | |
321 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f | |
322 ) | |
323 | |
324 # To be accurate, the length of class 6 can be either 2 or 4. | |
325 # But it is not necessary to discriminate between the two since | |
326 # it is used for frequency analysis only, and we are validating | |
327 # each code range there as well. So it is safe to set it to be | |
328 # 2 here. | |
329 GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) | |
330 | |
331 GB2312_SM_MODEL = {'class_table': GB2312_CLS, | |
332 'class_factor': 7, | |
333 'state_table': GB2312_ST, | |
334 'char_len_table': GB2312_CHAR_LEN_TABLE, | |
335 'name': 'GB2312'} | |
336 | |
337 # Shift_JIS | |
338 | |
339 SJIS_CLS = ( | |
340 1,1,1,1,1,1,1,1, # 00 - 07 | |
341 1,1,1,1,1,1,0,0, # 08 - 0f | |
342 1,1,1,1,1,1,1,1, # 10 - 17 | |
343 1,1,1,0,1,1,1,1, # 18 - 1f | |
344 1,1,1,1,1,1,1,1, # 20 - 27 | |
345 1,1,1,1,1,1,1,1, # 28 - 2f | |
346 1,1,1,1,1,1,1,1, # 30 - 37 | |
347 1,1,1,1,1,1,1,1, # 38 - 3f | |
348 2,2,2,2,2,2,2,2, # 40 - 47 | |
349 2,2,2,2,2,2,2,2, # 48 - 4f | |
350 2,2,2,2,2,2,2,2, # 50 - 57 | |
351 2,2,2,2,2,2,2,2, # 58 - 5f | |
352 2,2,2,2,2,2,2,2, # 60 - 67 | |
353 2,2,2,2,2,2,2,2, # 68 - 6f | |
354 2,2,2,2,2,2,2,2, # 70 - 77 | |
355 2,2,2,2,2,2,2,1, # 78 - 7f | |
356 3,3,3,3,3,2,2,3, # 80 - 87 | |
357 3,3,3,3,3,3,3,3, # 88 - 8f | |
358 3,3,3,3,3,3,3,3, # 90 - 97 | |
359 3,3,3,3,3,3,3,3, # 98 - 9f | |
360 #0xa0 is illegal in sjis encoding, but some pages does | |
361 #contain such byte. We need to be more error forgiven. | |
362 2,2,2,2,2,2,2,2, # a0 - a7 | |
363 2,2,2,2,2,2,2,2, # a8 - af | |
364 2,2,2,2,2,2,2,2, # b0 - b7 | |
365 2,2,2,2,2,2,2,2, # b8 - bf | |
366 2,2,2,2,2,2,2,2, # c0 - c7 | |
367 2,2,2,2,2,2,2,2, # c8 - cf | |
368 2,2,2,2,2,2,2,2, # d0 - d7 | |
369 2,2,2,2,2,2,2,2, # d8 - df | |
370 3,3,3,3,3,3,3,3, # e0 - e7 | |
371 3,3,3,3,3,4,4,4, # e8 - ef | |
372 3,3,3,3,3,3,3,3, # f0 - f7 | |
373 3,3,3,3,3,0,0,0) # f8 - ff | |
374 | |
375 | |
376 SJIS_ST = ( | |
377 MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 | |
378 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | |
379 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17 | |
380 ) | |
381 | |
382 SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) | |
383 | |
384 SJIS_SM_MODEL = {'class_table': SJIS_CLS, | |
385 'class_factor': 6, | |
386 'state_table': SJIS_ST, | |
387 'char_len_table': SJIS_CHAR_LEN_TABLE, | |
388 'name': 'Shift_JIS'} | |
389 | |
390 # UCS2-BE | |
391 | |
392 UCS2BE_CLS = ( | |
393 0,0,0,0,0,0,0,0, # 00 - 07 | |
394 0,0,1,0,0,2,0,0, # 08 - 0f | |
395 0,0,0,0,0,0,0,0, # 10 - 17 | |
396 0,0,0,3,0,0,0,0, # 18 - 1f | |
397 0,0,0,0,0,0,0,0, # 20 - 27 | |
398 0,3,3,3,3,3,0,0, # 28 - 2f | |
399 0,0,0,0,0,0,0,0, # 30 - 37 | |
400 0,0,0,0,0,0,0,0, # 38 - 3f | |
401 0,0,0,0,0,0,0,0, # 40 - 47 | |
402 0,0,0,0,0,0,0,0, # 48 - 4f | |
403 0,0,0,0,0,0,0,0, # 50 - 57 | |
404 0,0,0,0,0,0,0,0, # 58 - 5f | |
405 0,0,0,0,0,0,0,0, # 60 - 67 | |
406 0,0,0,0,0,0,0,0, # 68 - 6f | |
407 0,0,0,0,0,0,0,0, # 70 - 77 | |
408 0,0,0,0,0,0,0,0, # 78 - 7f | |
409 0,0,0,0,0,0,0,0, # 80 - 87 | |
410 0,0,0,0,0,0,0,0, # 88 - 8f | |
411 0,0,0,0,0,0,0,0, # 90 - 97 | |
412 0,0,0,0,0,0,0,0, # 98 - 9f | |
413 0,0,0,0,0,0,0,0, # a0 - a7 | |
414 0,0,0,0,0,0,0,0, # a8 - af | |
415 0,0,0,0,0,0,0,0, # b0 - b7 | |
416 0,0,0,0,0,0,0,0, # b8 - bf | |
417 0,0,0,0,0,0,0,0, # c0 - c7 | |
418 0,0,0,0,0,0,0,0, # c8 - cf | |
419 0,0,0,0,0,0,0,0, # d0 - d7 | |
420 0,0,0,0,0,0,0,0, # d8 - df | |
421 0,0,0,0,0,0,0,0, # e0 - e7 | |
422 0,0,0,0,0,0,0,0, # e8 - ef | |
423 0,0,0,0,0,0,0,0, # f0 - f7 | |
424 0,0,0,0,0,0,4,5 # f8 - ff | |
425 ) | |
426 | |
427 UCS2BE_ST = ( | |
428 5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 | |
429 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | |
430 MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17 | |
431 6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f | |
432 6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27 | |
433 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f | |
434 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37 | |
435 ) | |
436 | |
437 UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) | |
438 | |
439 UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS, | |
440 'class_factor': 6, | |
441 'state_table': UCS2BE_ST, | |
442 'char_len_table': UCS2BE_CHAR_LEN_TABLE, | |
443 'name': 'UTF-16BE'} | |
444 | |
445 # UCS2-LE | |
446 | |
447 UCS2LE_CLS = ( | |
448 0,0,0,0,0,0,0,0, # 00 - 07 | |
449 0,0,1,0,0,2,0,0, # 08 - 0f | |
450 0,0,0,0,0,0,0,0, # 10 - 17 | |
451 0,0,0,3,0,0,0,0, # 18 - 1f | |
452 0,0,0,0,0,0,0,0, # 20 - 27 | |
453 0,3,3,3,3,3,0,0, # 28 - 2f | |
454 0,0,0,0,0,0,0,0, # 30 - 37 | |
455 0,0,0,0,0,0,0,0, # 38 - 3f | |
456 0,0,0,0,0,0,0,0, # 40 - 47 | |
457 0,0,0,0,0,0,0,0, # 48 - 4f | |
458 0,0,0,0,0,0,0,0, # 50 - 57 | |
459 0,0,0,0,0,0,0,0, # 58 - 5f | |
460 0,0,0,0,0,0,0,0, # 60 - 67 | |
461 0,0,0,0,0,0,0,0, # 68 - 6f | |
462 0,0,0,0,0,0,0,0, # 70 - 77 | |
463 0,0,0,0,0,0,0,0, # 78 - 7f | |
464 0,0,0,0,0,0,0,0, # 80 - 87 | |
465 0,0,0,0,0,0,0,0, # 88 - 8f | |
466 0,0,0,0,0,0,0,0, # 90 - 97 | |
467 0,0,0,0,0,0,0,0, # 98 - 9f | |
468 0,0,0,0,0,0,0,0, # a0 - a7 | |
469 0,0,0,0,0,0,0,0, # a8 - af | |
470 0,0,0,0,0,0,0,0, # b0 - b7 | |
471 0,0,0,0,0,0,0,0, # b8 - bf | |
472 0,0,0,0,0,0,0,0, # c0 - c7 | |
473 0,0,0,0,0,0,0,0, # c8 - cf | |
474 0,0,0,0,0,0,0,0, # d0 - d7 | |
475 0,0,0,0,0,0,0,0, # d8 - df | |
476 0,0,0,0,0,0,0,0, # e0 - e7 | |
477 0,0,0,0,0,0,0,0, # e8 - ef | |
478 0,0,0,0,0,0,0,0, # f0 - f7 | |
479 0,0,0,0,0,0,4,5 # f8 - ff | |
480 ) | |
481 | |
482 UCS2LE_ST = ( | |
483 6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 | |
484 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | |
485 MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17 | |
486 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f | |
487 7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27 | |
488 5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f | |
489 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37 | |
490 ) | |
491 | |
492 UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) | |
493 | |
494 UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS, | |
495 'class_factor': 6, | |
496 'state_table': UCS2LE_ST, | |
497 'char_len_table': UCS2LE_CHAR_LEN_TABLE, | |
498 'name': 'UTF-16LE'} | |
499 | |
500 # UTF-8 | |
501 | |
502 UTF8_CLS = ( | |
503 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value | |
504 1,1,1,1,1,1,0,0, # 08 - 0f | |
505 1,1,1,1,1,1,1,1, # 10 - 17 | |
506 1,1,1,0,1,1,1,1, # 18 - 1f | |
507 1,1,1,1,1,1,1,1, # 20 - 27 | |
508 1,1,1,1,1,1,1,1, # 28 - 2f | |
509 1,1,1,1,1,1,1,1, # 30 - 37 | |
510 1,1,1,1,1,1,1,1, # 38 - 3f | |
511 1,1,1,1,1,1,1,1, # 40 - 47 | |
512 1,1,1,1,1,1,1,1, # 48 - 4f | |
513 1,1,1,1,1,1,1,1, # 50 - 57 | |
514 1,1,1,1,1,1,1,1, # 58 - 5f | |
515 1,1,1,1,1,1,1,1, # 60 - 67 | |
516 1,1,1,1,1,1,1,1, # 68 - 6f | |
517 1,1,1,1,1,1,1,1, # 70 - 77 | |
518 1,1,1,1,1,1,1,1, # 78 - 7f | |
519 2,2,2,2,3,3,3,3, # 80 - 87 | |
520 4,4,4,4,4,4,4,4, # 88 - 8f | |
521 4,4,4,4,4,4,4,4, # 90 - 97 | |
522 4,4,4,4,4,4,4,4, # 98 - 9f | |
523 5,5,5,5,5,5,5,5, # a0 - a7 | |
524 5,5,5,5,5,5,5,5, # a8 - af | |
525 5,5,5,5,5,5,5,5, # b0 - b7 | |
526 5,5,5,5,5,5,5,5, # b8 - bf | |
527 0,0,6,6,6,6,6,6, # c0 - c7 | |
528 6,6,6,6,6,6,6,6, # c8 - cf | |
529 6,6,6,6,6,6,6,6, # d0 - d7 | |
530 6,6,6,6,6,6,6,6, # d8 - df | |
531 7,8,8,8,8,8,8,8, # e0 - e7 | |
532 8,8,8,8,8,9,8,8, # e8 - ef | |
533 10,11,11,11,11,11,11,11, # f0 - f7 | |
534 12,13,13,13,14,15,0,0 # f8 - ff | |
535 ) | |
536 | |
537 UTF8_ST = ( | |
538 MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07 | |
539 9, 11, 8, 7, 6, 5, 4, 3,#08-0f | |
540 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 | |
541 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f | |
542 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27 | |
543 MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f | |
544 MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37 | |
545 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f | |
546 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47 | |
547 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f | |
548 MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57 | |
549 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f | |
550 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67 | |
551 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f | |
552 MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77 | |
553 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f | |
554 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87 | |
555 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f | |
556 MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97 | |
557 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f | |
558 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7 | |
559 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af | |
560 MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7 | |
561 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf | |
562 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7 | |
563 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf | |
564 ) | |
565 | |
566 UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) | |
567 | |
568 UTF8_SM_MODEL = {'class_table': UTF8_CLS, | |
569 'class_factor': 16, | |
570 'state_table': UTF8_ST, | |
571 'char_len_table': UTF8_CHAR_LEN_TABLE, | |
572 'name': 'UTF-8'} |