Mercurial > repos > guerler > hhblits
comparison lib/python3.8/site-packages/pip/_vendor/chardet/codingstatemachine.py @ 0:9e54283cc701 draft
"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author | guerler |
---|---|
date | Mon, 27 Jul 2020 03:47:31 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9e54283cc701 |
---|---|
1 ######################## BEGIN LICENSE BLOCK ######################## | |
2 # The Original Code is mozilla.org code. | |
3 # | |
4 # The Initial Developer of the Original Code is | |
5 # Netscape Communications Corporation. | |
6 # Portions created by the Initial Developer are Copyright (C) 1998 | |
7 # the Initial Developer. All Rights Reserved. | |
8 # | |
9 # Contributor(s): | |
10 # Mark Pilgrim - port to Python | |
11 # | |
12 # This library is free software; you can redistribute it and/or | |
13 # modify it under the terms of the GNU Lesser General Public | |
14 # License as published by the Free Software Foundation; either | |
15 # version 2.1 of the License, or (at your option) any later version. | |
16 # | |
17 # This library is distributed in the hope that it will be useful, | |
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 # Lesser General Public License for more details. | |
21 # | |
22 # You should have received a copy of the GNU Lesser General Public | |
23 # License along with this library; if not, write to the Free Software | |
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | |
25 # 02110-1301 USA | |
26 ######################### END LICENSE BLOCK ######################### | |
27 | |
28 import logging | |
29 | |
30 from .enums import MachineState | |
31 | |
32 | |
33 class CodingStateMachine(object): | |
34 """ | |
35 A state machine to verify a byte sequence for a particular encoding. For | |
36 each byte the detector receives, it will feed that byte to every active | |
37 state machine available, one byte at a time. The state machine changes its | |
38 state based on its previous state and the byte it receives. There are 3 | |
39 states in a state machine that are of interest to an auto-detector: | |
40 | |
41 START state: This is the state to start with, or a legal byte sequence | |
42 (i.e. a valid code point) for character has been identified. | |
43 | |
44 ME state: This indicates that the state machine identified a byte sequence | |
45 that is specific to the charset it is designed for and that | |
46 there is no other possible encoding which can contain this byte | |
47 sequence. This will to lead to an immediate positive answer for | |
48 the detector. | |
49 | |
50 ERROR state: This indicates the state machine identified an illegal byte | |
51 sequence for that encoding. This will lead to an immediate | |
52 negative answer for this encoding. Detector will exclude this | |
53 encoding from consideration from here on. | |
54 """ | |
55 def __init__(self, sm): | |
56 self._model = sm | |
57 self._curr_byte_pos = 0 | |
58 self._curr_char_len = 0 | |
59 self._curr_state = None | |
60 self.logger = logging.getLogger(__name__) | |
61 self.reset() | |
62 | |
63 def reset(self): | |
64 self._curr_state = MachineState.START | |
65 | |
66 def next_state(self, c): | |
67 # for each byte we get its class | |
68 # if it is first byte, we also get byte length | |
69 byte_class = self._model['class_table'][c] | |
70 if self._curr_state == MachineState.START: | |
71 self._curr_byte_pos = 0 | |
72 self._curr_char_len = self._model['char_len_table'][byte_class] | |
73 # from byte's class and state_table, we get its next state | |
74 curr_state = (self._curr_state * self._model['class_factor'] | |
75 + byte_class) | |
76 self._curr_state = self._model['state_table'][curr_state] | |
77 self._curr_byte_pos += 1 | |
78 return self._curr_state | |
79 | |
80 def get_current_charlen(self): | |
81 return self._curr_char_len | |
82 | |
83 def get_coding_state_machine(self): | |
84 return self._model['name'] | |
85 | |
86 @property | |
87 def language(self): | |
88 return self._model['language'] |