Mercurial > repos > shellac > guppy_basecaller
annotate env/lib/python3.7/site-packages/chardet/sbcharsetprober.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac | 
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
0
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
1 ######################## BEGIN LICENSE BLOCK ######################## | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
2 # The Original Code is Mozilla Universal charset detector code. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
3 # | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
4 # The Initial Developer of the Original Code is | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
5 # Netscape Communications Corporation. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
6 # Portions created by the Initial Developer are Copyright (C) 2001 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
7 # the Initial Developer. All Rights Reserved. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
8 # | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
9 # Contributor(s): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
10 # Mark Pilgrim - port to Python | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
11 # Shy Shalom - original C code | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
12 # | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
13 # This library is free software; you can redistribute it and/or | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
14 # modify it under the terms of the GNU Lesser General Public | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
15 # License as published by the Free Software Foundation; either | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
16 # version 2.1 of the License, or (at your option) any later version. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
17 # | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
18 # This library is distributed in the hope that it will be useful, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
21 # Lesser General Public License for more details. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
22 # | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
23 # You should have received a copy of the GNU Lesser General Public | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
24 # License along with this library; if not, write to the Free Software | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
26 # 02110-1301 USA | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
27 ######################### END LICENSE BLOCK ######################### | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
28 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
29 from .charsetprober import CharSetProber | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
30 from .enums import CharacterCategory, ProbingState, SequenceLikelihood | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
31 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
32 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
33 class SingleByteCharSetProber(CharSetProber): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
34 SAMPLE_SIZE = 64 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
35 SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
36 POSITIVE_SHORTCUT_THRESHOLD = 0.95 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
37 NEGATIVE_SHORTCUT_THRESHOLD = 0.05 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
38 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
39 def __init__(self, model, reversed=False, name_prober=None): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
40 super(SingleByteCharSetProber, self).__init__() | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
41 self._model = model | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
42 # TRUE if we need to reverse every pair in the model lookup | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
43 self._reversed = reversed | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
44 # Optional auxiliary prober for name decision | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
45 self._name_prober = name_prober | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
46 self._last_order = None | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
47 self._seq_counters = None | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
48 self._total_seqs = None | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
49 self._total_char = None | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
50 self._freq_char = None | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
51 self.reset() | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
52 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
53 def reset(self): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
54 super(SingleByteCharSetProber, self).reset() | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
55 # char order of last character | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
56 self._last_order = 255 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
57 self._seq_counters = [0] * SequenceLikelihood.get_num_categories() | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
58 self._total_seqs = 0 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
59 self._total_char = 0 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
60 # characters that fall in our sampling range | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
61 self._freq_char = 0 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
62 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
63 @property | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
64 def charset_name(self): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
65 if self._name_prober: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
66 return self._name_prober.charset_name | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
67 else: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
68 return self._model['charset_name'] | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
69 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
70 @property | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
71 def language(self): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
72 if self._name_prober: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
73 return self._name_prober.language | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
74 else: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
75 return self._model.get('language') | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
76 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
77 def feed(self, byte_str): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
78 if not self._model['keep_english_letter']: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
79 byte_str = self.filter_international_words(byte_str) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
80 if not byte_str: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
81 return self.state | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
82 char_to_order_map = self._model['char_to_order_map'] | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
83 for i, c in enumerate(byte_str): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
84 # XXX: Order is in range 1-64, so one would think we want 0-63 here, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
85 # but that leads to 27 more test failures than before. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
86 order = char_to_order_map[c] | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
87 # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
88 # CharacterCategory.SYMBOL is actually 253, so we use CONTROL | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
89 # to make it closer to the original intent. The only difference | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
90 # is whether or not we count digits and control characters for | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
91 # _total_char purposes. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
92 if order < CharacterCategory.CONTROL: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
93 self._total_char += 1 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
94 if order < self.SAMPLE_SIZE: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
95 self._freq_char += 1 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
96 if self._last_order < self.SAMPLE_SIZE: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
97 self._total_seqs += 1 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
98 if not self._reversed: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
99 i = (self._last_order * self.SAMPLE_SIZE) + order | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
100 model = self._model['precedence_matrix'][i] | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
101 else: # reverse the order of the letters in the lookup | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
102 i = (order * self.SAMPLE_SIZE) + self._last_order | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
103 model = self._model['precedence_matrix'][i] | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
104 self._seq_counters[model] += 1 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
105 self._last_order = order | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
106 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
107 charset_name = self._model['charset_name'] | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
108 if self.state == ProbingState.DETECTING: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
109 if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
110 confidence = self.get_confidence() | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
111 if confidence > self.POSITIVE_SHORTCUT_THRESHOLD: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
112 self.logger.debug('%s confidence = %s, we have a winner', | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
113 charset_name, confidence) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
114 self._state = ProbingState.FOUND_IT | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
115 elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
116 self.logger.debug('%s confidence = %s, below negative ' | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
117 'shortcut threshhold %s', charset_name, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
118 confidence, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
119 self.NEGATIVE_SHORTCUT_THRESHOLD) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
120 self._state = ProbingState.NOT_ME | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
121 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
122 return self.state | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
123 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
124 def get_confidence(self): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
125 r = 0.01 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
126 if self._total_seqs > 0: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
127 r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
128 self._total_seqs / self._model['typical_positive_ratio']) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
129 r = r * self._freq_char / self._total_char | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
130 if r >= 1.0: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
131 r = 0.99 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
132 return r | 
