comparison env/lib/python3.7/site-packages/chardet/sbcsgroupprober.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
comparison
equal deleted inserted replaced
4:79f47841a781 5:9b1c78e6ba9c
1 ######################## BEGIN LICENSE BLOCK ########################
2 # The Original Code is Mozilla Universal charset detector code.
3 #
4 # The Initial Developer of the Original Code is
5 # Netscape Communications Corporation.
6 # Portions created by the Initial Developer are Copyright (C) 2001
7 # the Initial Developer. All Rights Reserved.
8 #
9 # Contributor(s):
10 # Mark Pilgrim - port to Python
11 # Shy Shalom - original C code
12 #
13 # This library is free software; you can redistribute it and/or
14 # modify it under the terms of the GNU Lesser General Public
15 # License as published by the Free Software Foundation; either
16 # version 2.1 of the License, or (at your option) any later version.
17 #
18 # This library is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 # Lesser General Public License for more details.
22 #
23 # You should have received a copy of the GNU Lesser General Public
24 # License along with this library; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 # 02110-1301 USA
27 ######################### END LICENSE BLOCK #########################
28
29 from .charsetgroupprober import CharSetGroupProber
30 from .sbcharsetprober import SingleByteCharSetProber
31 from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 Latin5CyrillicModel, MacCyrillicModel,
33 Ibm866Model, Ibm855Model)
34 from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 from .langthaimodel import TIS620ThaiModel
38 from .langhebrewmodel import Win1255HebrewModel
39 from .hebrewprober import HebrewProber
40 from .langturkishmodel import Latin5TurkishModel
41
42
43 class SBCSGroupProber(CharSetGroupProber):
44 def __init__(self):
45 super(SBCSGroupProber, self).__init__()
46 self.probers = [
47 SingleByteCharSetProber(Win1251CyrillicModel),
48 SingleByteCharSetProber(Koi8rModel),
49 SingleByteCharSetProber(Latin5CyrillicModel),
50 SingleByteCharSetProber(MacCyrillicModel),
51 SingleByteCharSetProber(Ibm866Model),
52 SingleByteCharSetProber(Ibm855Model),
53 SingleByteCharSetProber(Latin7GreekModel),
54 SingleByteCharSetProber(Win1253GreekModel),
55 SingleByteCharSetProber(Latin5BulgarianModel),
56 SingleByteCharSetProber(Win1251BulgarianModel),
57 # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
58 # after we retrain model.
59 # SingleByteCharSetProber(Latin2HungarianModel),
60 # SingleByteCharSetProber(Win1250HungarianModel),
61 SingleByteCharSetProber(TIS620ThaiModel),
62 SingleByteCharSetProber(Latin5TurkishModel),
63 ]
64 hebrew_prober = HebrewProber()
65 logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
66 False, hebrew_prober)
67 visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
68 hebrew_prober)
69 hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
70 self.probers.extend([hebrew_prober, logical_hebrew_prober,
71 visual_hebrew_prober])
72
73 self.reset()