annotate galaxy/sre_yield.py @ 42:439b70949f8d draft

Uploaded
author gianmarco_piccinno
date Mon, 20 May 2019 16:44:00 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
42
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
1 #!/usr/bin/env python2
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
2 #
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
3 # Copyright 2011-2016 Google Inc.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
4 #
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
5 # Licensed under the Apache License, Version 2.0 (the "License");
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
6 # you may not use this file except in compliance with the License.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
7 # You may obtain a copy of the License at
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
8 #
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
9 # http://www.apache.org/licenses/LICENSE-2.0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
10 #
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
11 # Unless required by applicable law or agreed to in writing, software
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
12 # distributed under the License is distributed on an "AS IS" BASIS,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
14 # See the License for the specific language governing permissions and
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
15 # limitations under the License.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
16 #
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
17 # vim: sw=2 sts=2 et
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
18
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
19 """This module can generate all strings that match a regular expression.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
20
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
21 The regex is parsed using the SRE module that is standard in python,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
22 then the data structure is executed to form a bunch of iterators.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
23 """
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
24
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
25 __author__ = 'alexperry@google.com (Alex Perry)'
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
26 __all__ = ['Values', 'AllStrings', 'AllMatches', 'ParseError']
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
27
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
28
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
29 import bisect
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
30 import math
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
31 import re
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
32 import sre_constants
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
33 import sre_parse
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
34 import string
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
35 import sys
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
36 import types
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
37
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
38 import cachingseq
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
39 import fastdivmod
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
40
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
41 try:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
42 xrange = xrange
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
43 except NameError:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
44 xrange = range
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
45
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
46 _RE_METACHARS = r'$^{}*+\\'
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
47 _ESCAPED_METACHAR = r'\\[' + _RE_METACHARS + r']'
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
48 ESCAPED_METACHAR_RE = re.compile(_ESCAPED_METACHAR)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
49 # ASCII by default, see https://github.com/google/sre_yield/issues/3
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
50 CHARSET = [chr(c) for c in range(256)]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
51
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
52 WORD = string.ascii_letters + string.digits + '_'
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
53
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
54 try:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
55 DEFAULT_RE_FLAGS = re.ASCII
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
56 except AttributeError:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
57 DEFAULT_RE_FLAGS = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
58
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
59 STATE_START, STATE_MIDDLE, STATE_END = list(range(3))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
60
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
61 def Not(chars):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
62 return ''.join(sorted(set(CHARSET) - set(chars)))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
63
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
64
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
65 CATEGORIES = {
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
66 sre_constants.CATEGORY_WORD: WORD,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
67 sre_constants.CATEGORY_NOT_WORD: Not(WORD),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
68 sre_constants.CATEGORY_DIGIT: string.digits,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
69 sre_constants.CATEGORY_NOT_DIGIT: Not(string.digits),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
70 sre_constants.CATEGORY_SPACE: string.whitespace,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
71 sre_constants.CATEGORY_NOT_SPACE: Not(string.whitespace),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
72 }
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
73
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
74 # This constant varies between builds of Python; this is the lower value.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
75 MAX_REPEAT_COUNT = 65535
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
76
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
77
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
78 class ParseError(Exception):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
79 pass
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
80
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
81
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
82 def slice_indices(slice_obj, size):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
83 """slice_obj.indices() except this one supports longs."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
84 # start stop step
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
85 start = slice_obj.start
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
86 stop = slice_obj.stop
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
87 step = slice_obj.step
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
88
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
89 # We don't always update a value for negative indices (if we wrote it here
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
90 # due to None).
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
91 if step is None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
92 step = 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
93 if start is None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
94 if step > 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
95 start = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
96 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
97 start = size - 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
98 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
99 start = _adjust_index(start, size)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
100
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
101 if stop is None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
102 if step > 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
103 stop = size
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
104 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
105 stop = -1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
106 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
107 stop = _adjust_index(stop, size)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
108
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
109 return (start, stop, step)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
110
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
111
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
112 def _adjust_index(n, size):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
113 if n < 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
114 n += size
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
115
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
116 if n < 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
117 raise IndexError("Out of range")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
118 if n > size:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
119 n = size
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
120 return n
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
121
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
122
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
123 def _xrange(*args):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
124 """Because xrange doesn't support longs :("""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
125 # prefer real xrange if it works
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
126 try:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
127 return xrange(*args)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
128 except OverflowError:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
129 return _bigrange(*args)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
130
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
131
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
132 def _bigrange(*args):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
133 if len(args) == 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
134 start = 0; stop = args[0]; step = 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
135 elif len(args) == 2:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
136 start, stop = args
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
137 step = 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
138 elif len(args) == 3:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
139 start, stop, step = args
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
140 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
141 raise ValueError("Too many args for _bigrange")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
142
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
143 i = start
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
144 while True:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
145 yield i
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
146 i += step
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
147 if step < 0 and i <= stop:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
148 break
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
149 if step > 0 and i >= stop:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
150 break
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
151
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
152
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
153 class WrappedSequence(object):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
154 """This wraps a sequence, purely as a base clase for the other uses."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
155
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
156 def __init__(self, raw):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
157 # Derived classes will likely override this constructor
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
158 self.raw = raw
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
159 # Note that we can't use the function len() because it insists on trying
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
160 # to convert the returned number from a long-int to an ordinary int.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
161 self.length = raw.__len__()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
162
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
163 def get_item(self, i, d=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
164 i = _adjust_index(i, self.length)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
165 if hasattr(self.raw, 'get_item'):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
166 return self.raw.get_item(i, d)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
167 return self.raw[i]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
168
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
169 def __len__(self):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
170 return self.length
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
171
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
172 def __getitem__(self, i):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
173 # If the user wanted a slice, we provide a wrapper
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
174 if isinstance(i, slice):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
175 result = SlicedSequence(self, slicer=i)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
176 if result.__len__() < 16:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
177 # Short lists are unpacked
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
178 result = [item for item in result]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
179 return result
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
180 i = _adjust_index(i, self.length)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
181 # Usually we just call the user-provided function
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
182 return self.get_item(i)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
183
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
184 def __iter__(self):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
185 for i in _xrange(int(self.length)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
186 yield self.get_item(i)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
187
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
188
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
189 def _sign(x):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
190 if x > 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
191 return 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
192 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
193 return -1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
194
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
195
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
196 class SlicedSequence(WrappedSequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
197 """This is part of an immutable and potentially arbitrarily long list."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
198
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
199 def __init__(self, raw, slicer=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
200 # Derived classes will likely override this constructor
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
201 self.raw = raw
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
202 if slicer is None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
203 self.start, self.stop, self.step = 0, raw.__len__(), 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
204 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
205 self.start, self.stop, self.step = slice_indices(slicer, raw.__len__())
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
206
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
207 # Integer round up, depending on step direction
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
208 self.length = ((self.stop - self.start + self.step - _sign(self.step)) /
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
209 self.step)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
210
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
211 def get_item(self, i, d=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
212 j = i * self.step + self.start
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
213 return self.raw[j]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
214
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
215
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
216 class ConcatenatedSequence(WrappedSequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
217 """This is equivalent to using extend() but without unpacking the lists."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
218
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
219 def __init__(self, *alternatives):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
220 self.list_lengths = [(a, a.__len__()) for a in alternatives]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
221 self.length = sum(a_len for _, a_len in self.list_lengths)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
222
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
223 def get_item(self, i, d=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
224 for a, a_len in self.list_lengths:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
225 if i < a_len:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
226 return a[i]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
227 i -= a_len
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
228 raise IndexError('Too Big')
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
229
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
230 def __contains__(self, item):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
231 for a, _ in self.list_lengths:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
232 if item in a:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
233 return True
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
234 return False
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
235
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
236 def __repr__(self):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
237 return '{concat ' + repr(self.list_lengths) + '}'
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
238
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
239
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
240 class CombinatoricsSequence(WrappedSequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
241 """This uses all combinations of one item from each passed list."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
242
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
243 def __init__(self, *components):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
244 self.list_lengths = [(a, a.__len__()) for a in components]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
245 self.length = 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
246 for _, c_len in self.list_lengths:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
247 self.length *= c_len
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
248
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
249 def get_item(self, i, d=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
250 result = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
251 if i < 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
252 i += self.length
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
253 if i < 0 or i >= self.length:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
254 raise IndexError("Index %d out of bounds" % (i,))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
255
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
256 if len(self.list_lengths) == 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
257 # skip unnecessary ''.join -- big speedup
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
258 return self.list_lengths[0][0][i]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
259
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
260 for c, c_len in self.list_lengths:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
261 i, mod = divmod(i, c_len)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
262 if hasattr(c, 'get_item'):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
263 result.append(c.get_item(mod, d))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
264 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
265 result.append(c[mod])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
266 return ''.join(result)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
267
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
268 def __repr__(self):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
269 return '{combin ' + repr(self.list_lengths) + '}'
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
270
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
271
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
272 class RepetitiveSequence(WrappedSequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
273 """This chooses an entry from a list, many times, and concatenates."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
274
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
275 def __init__(self, content, lowest=1, highest=1):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
276 self.content = content
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
277 self.content_length = content.__len__()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
278 self.length = fastdivmod.powersum(self.content_length, lowest, highest)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
279 self.lowest = lowest
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
280 self.highest = highest
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
281
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
282 def arbitrary_entry(i):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
283 return (fastdivmod.powersum(self.content_length, lowest, i+lowest-1), i+lowest)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
284
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
285 def entry_from_prev(i, prev):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
286 return (prev[0] + (self.content_length ** prev[1]), prev[1] + 1)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
287
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
288 self.offsets = cachingseq.CachingFuncSequence(
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
289 arbitrary_entry, highest - lowest+1, entry_from_prev)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
290 # This needs to be a constant in order to reuse caclulations in future
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
291 # calls to bisect (a moving target will produce more misses).
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
292 if self.offsets[-1][0] > sys.maxsize:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
293 i = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
294 while i + 2 < len(self.offsets):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
295 if self.offsets[i+1][0] > sys.maxsize:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
296 self.index_of_offset = i
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
297 self.offset_break = self.offsets[i][0]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
298 break
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
299 i += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
300 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
301 self.index_of_offset = len(self.offsets)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
302 self.offset_break = sys.maxsize
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
303
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
304 def get_item(self, i, d=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
305 """Finds out how many repeats this index implies, then picks strings."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
306 if i < self.offset_break:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
307 by_bisect = bisect.bisect_left(self.offsets, (i, -1), hi=self.index_of_offset)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
308 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
309 by_bisect = bisect.bisect_left(self.offsets, (i, -1), lo=self.index_of_offset)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
310
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
311 if by_bisect == len(self.offsets) or self.offsets[by_bisect][0] > i:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
312 by_bisect -= 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
313
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
314 num = i - self.offsets[by_bisect][0]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
315 count = self.offsets[by_bisect][1]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
316
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
317 if count > 100 and self.content_length < 1000:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
318 content = list(self.content)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
319 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
320 content = self.content
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
321
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
322 result = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
323
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
324 if count == 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
325 return ''
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
326
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
327 for modulus in fastdivmod.divmod_iter(num, self.content_length):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
328 result.append(content[modulus])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
329
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
330 leftover = count - len(result)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
331 if leftover:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
332 assert leftover > 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
333 result.extend([content[0]] * leftover)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
334
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
335 # smallest place value ends up on the right
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
336 return ''.join(result[::-1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
337
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
338 def __repr__(self):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
339 return '{repeat base=%d low=%d high=%d}' % (self.content_length, self.lowest, self.highest)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
340
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
341
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
342 class SaveCaptureGroup(WrappedSequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
343 def __init__(self, parsed, key):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
344 self.key = key
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
345 super(SaveCaptureGroup, self).__init__(parsed)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
346
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
347 def get_item(self, n, d=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
348 rv = super(SaveCaptureGroup, self).get_item(n, d)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
349 if d is not None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
350 d[self.key] = rv
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
351 return rv
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
352
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
353
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
354 class ReadCaptureGroup(WrappedSequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
355 def __init__(self, n):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
356 self.num = n
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
357 self.length = 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
358
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
359 def get_item(self, i, d=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
360 if i != 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
361 raise IndexError(i)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
362 if d is None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
363 raise ValueError('ReadCaptureGroup with no dict')
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
364 return d.get(self.num, "fail")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
365
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
366
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
367 class RegexMembershipSequence(WrappedSequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
368 """Creates a sequence from the regex, knows how to test membership."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
369
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
370 def empty_list(self, *_):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
371 return []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
372
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
373 def nothing_added(self, *_):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
374 return ['']
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
375
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
376 def branch_values(self, _, items):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
377 """Converts SRE parser data into literals and merges those lists."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
378 return ConcatenatedSequence(
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
379 *[self.sub_values(parsed) for parsed in items])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
380
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
381 def max_repeat_values(self, min_count, max_count, items):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
382 """Sequential expansion of the count to be combinatorics."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
383 max_count = min(max_count, self.max_count)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
384 return RepetitiveSequence(
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
385 self.sub_values(items), min_count, max_count)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
386
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
387 def in_values(self, items):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
388 # Special case which distinguishes branch from charset operator
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
389 if items and items[0][0] == sre_constants.NEGATE:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
390 items = self.branch_values(None, items[1:])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
391 return [item for item in self.charset if item not in items]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
392 return self.branch_values(None, items)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
393
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
394 def not_literal(self, y):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
395 return self.in_values(((sre_constants.NEGATE,),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
396 (sre_constants.LITERAL, y),))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
397
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
398 def category(self, y):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
399 return CATEGORIES[y]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
400
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
401 def groupref(self, n):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
402 self.has_groupref = True
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
403 return ReadCaptureGroup(n)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
404
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
405 def get_item(self, i, d=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
406 """Typically only pass i. d is an internal detail, for consistency with other classes.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
407
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
408 If you care about the capture groups, you should use
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
409 RegexMembershipSequenceMatches instead, which returns a Match object
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
410 instead of a string."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
411 if self.has_groupref or d is not None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
412 if d is None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
413 d = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
414 return super(RegexMembershipSequence, self).get_item(i, d)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
415 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
416 return super(RegexMembershipSequence, self).get_item(i)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
417
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
418 def sub_values(self, parsed):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
419 """This knows how to convert one piece of parsed pattern."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
420 # If this is a subpattern object, we just want its data
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
421 if isinstance(parsed, sre_parse.SubPattern):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
422 parsed = parsed.data
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
423 # A list indicates sequential elements of a string
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
424 if isinstance(parsed, list):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
425 elements = [self.sub_values(p) for p in parsed]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
426 return CombinatoricsSequence(*elements)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
427 # If not a list, a tuple represents a specific match type
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
428 if isinstance(parsed, tuple) and parsed:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
429 matcher, arguments = parsed
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
430 if not isinstance(arguments, tuple):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
431 arguments = (arguments,)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
432 if matcher in self.backends:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
433 self.check_anchor_state(matcher, arguments)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
434 return self.backends[matcher](*arguments)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
435 # No idea what to do here
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
436 raise ParseError(repr(parsed))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
437
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
438 def maybe_save(self, *args):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
439 # Python 3.6 has group, add_flags, del_flags, parsed
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
440 # while earlier versions just have group, parsed
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
441 group = args[0]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
442 parsed = args[-1]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
443 rv = self.sub_values(parsed)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
444 if group is not None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
445 rv = SaveCaptureGroup(rv, group)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
446 return rv
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
447
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
448 def check_anchor_state(self, matcher, arguments):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
449 # A bit of a hack to support zero-width leading anchors. The goal is
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
450 # that /^(a|b)$/ will match properly, and that /a^b/ or /a\bb/ throws
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
451 # an error. (It's unfortunate that I couldn't easily handle /$^/ which
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
452 # matches the empty string; I went for the common case.)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
453 #
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
454 # There are three states, for example:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
455 # / STATE_START
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
456 # | / STATE_START (^ causes no transition here, but is illegal at STATE_MIDDLE or STATE_END)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
457 # | | / STATE_START (\b causes no transition here, but advances MIDDLE to END)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
458 # | | | / (same as above for ^)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
459 # | | | | / STATE_MIDDLE (anything besides ^ and \b advances START to MIDDLE)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
460 # | | | | | / still STATE_MIDDLE
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
461 # . . . . . . / advances MIDDLE to END
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
462 # ^ \b ^ X Y \b $
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
463 old_state = self.state
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
464 if self.state == STATE_START:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
465 if matcher == sre_constants.AT:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
466 if arguments[0] in (sre_constants.AT_END, sre_constants.AT_END_STRING):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
467 self.state = STATE_END
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
468 elif arguments[0] == sre_constants.AT_NON_BOUNDARY:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
469 # This is nonsensical at beginning of string
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
470 raise ParseError('Anchor %r found at START state' % (arguments[0],))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
471 # All others (AT_BEGINNING, AT_BEGINNING_STRING, and AT_BOUNDARY) remain in START.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
472 elif matcher != sre_constants.SUBPATTERN:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
473 self.state = STATE_MIDDLE
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
474 # subpattern remains in START
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
475 elif self.state == STATE_END:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
476 if matcher == sre_constants.AT:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
477 if arguments[0] not in (
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
478 sre_constants.AT_END, sre_constants.AT_END_STRING,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
479 sre_constants.AT_BOUNDARY):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
480 raise ParseError('Anchor %r found at END state' % (arguments[0],))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
481 # those three remain in END
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
482 elif matcher != sre_constants.SUBPATTERN:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
483 raise ParseError('Non-end-anchor %r found at END state' % (arguments[0],))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
484 # subpattern remains in END
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
485 else: # self.state == STATE_MIDDLE
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
486 if matcher == sre_constants.AT:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
487 if arguments[0] not in (
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
488 sre_constants.AT_END, sre_constants.AT_END_STRING,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
489 sre_constants.AT_BOUNDARY):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
490 raise ParseError('Anchor %r found at MIDDLE state' % (arguments[0],))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
491 # All others (AT_END, AT_END_STRING, AT_BOUNDARY) advance to END.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
492 self.state = STATE_END
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
493
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
494 def __init__(self, pattern, flags=0, charset=CHARSET, max_count=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
495 # If the RE module cannot compile it, we give up quickly
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
496 self.matcher = re.compile(r'(?:%s)\Z' % pattern, flags)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
497 if not flags & re.DOTALL:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
498 charset = ''.join(c for c in charset if c != '\n')
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
499 self.charset = charset
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
500
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
501 self.named_group_lookup = self.matcher.groupindex
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
502
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
503 flags |= DEFAULT_RE_FLAGS # https://github.com/google/sre_yield/issues/3
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
504 if flags & re.IGNORECASE:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
505 raise ParseError('Flag "i" not supported. https://github.com/google/sre_yield/issues/4')
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
506 elif flags & re.UNICODE:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
507 raise ParseError('Flag "u" not supported. https://github.com/google/sre_yield/issues/3')
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
508 elif flags & re.LOCALE:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
509 raise ParseError('Flag "l" not supported. https://github.com/google/sre_yield/issues/5')
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
510
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
511 if max_count is None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
512 self.max_count = MAX_REPEAT_COUNT
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
513 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
514 self.max_count = max_count
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
515
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
516 self.has_groupref = False
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
517
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
518 # Configure the parser backends
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
519 self.backends = {
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
520 sre_constants.LITERAL: lambda y: [chr(y)],
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
521 sre_constants.RANGE: lambda l, h: [chr(c) for c in range(l, h+1)],
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
522 sre_constants.SUBPATTERN: self.maybe_save,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
523 sre_constants.BRANCH: self.branch_values,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
524 sre_constants.MIN_REPEAT: self.max_repeat_values,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
525 sre_constants.MAX_REPEAT: self.max_repeat_values,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
526 sre_constants.AT: self.nothing_added,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
527 sre_constants.ASSERT: self.empty_list,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
528 sre_constants.ASSERT_NOT: self.empty_list,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
529 sre_constants.ANY:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
530 lambda _: self.in_values(((sre_constants.NEGATE,),)),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
531 sre_constants.IN: self.in_values,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
532 sre_constants.NOT_LITERAL: self.not_literal,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
533 sre_constants.CATEGORY: self.category,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
534 sre_constants.GROUPREF: self.groupref,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
535 }
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
536 self.state = STATE_START
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
537 # Now build a generator that knows all possible patterns
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
538 self.raw = self.sub_values(sre_parse.parse(pattern, flags))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
539 # Configure this class instance to know about that result
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
540 self.length = self.raw.__len__()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
541
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
542 def __contains__(self, item):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
543 # Since we have a regex, we can search the list really cheaply
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
544 return self.matcher.match(item) is not None
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
545
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
546
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
547 class RegexMembershipSequenceMatches(RegexMembershipSequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
548 def __getitem__(self, i):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
549 if isinstance(i, slice):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
550 result = SlicedSequence(self, slicer=i)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
551 if result.__len__() < 16:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
552 # Short lists are unpacked
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
553 result = [item for item in result]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
554 return result
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
555
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
556 d = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
557 s = super(RegexMembershipSequenceMatches, self).get_item(i, d)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
558 return Match(s, d, self.named_group_lookup)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
559
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
560
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
561 def AllStrings(regex, flags=0, charset=CHARSET, max_count=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
562 """Constructs an object that will generate all matching strings."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
563 return RegexMembershipSequence(regex, flags, charset, max_count=max_count)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
564
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
565 Values = AllStrings
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
566
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
567
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
568 class Match(object):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
569 def __init__(self, string, groups, named_groups):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
570 # TODO keep group(0) only, and spans for the rest.
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
571 self._string = string
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
572 self._groups = groups
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
573 self._named_groups = named_groups
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
574 self.lastindex = len(groups) + 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
575
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
576 def group(self, n=0):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
577 if n == 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
578 return self._string
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
579 if not isinstance(n, int):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
580 n = self._named_groups[n]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
581 return self._groups[n]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
582
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
583 def groups(self):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
584 return tuple(self._groups[i] for i in range(1, self.lastindex))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
585
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
586 def groupdict(self):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
587 d = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
588 for k, v in self._named_groups.items():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
589 d[k] = self._groups[v]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
590 return d
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
591
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
592 def span(self, n=0):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
593 raise NotImplementedError()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
594
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
595
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
596 def AllMatches(regex, flags=0, charset=CHARSET, max_count=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
597 """Constructs an object that will generate all matching strings."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
598 return RegexMembershipSequenceMatches(regex, flags, charset, max_count=max_count)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
599
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
600
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
601 def main(argv=None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
602 """This module can be executed on the command line for testing."""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
603 if argv is None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
604 argv = sys.argv
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
605 for arg in argv[1:]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
606 for i in AllStrings(arg):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
607 print(i)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
608
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
609
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
610 if __name__ == '__main__':
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
611 main()