comparison env/lib/python3.9/site-packages/boltons/formatutils.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """`PEP 3101`_ introduced the :meth:`str.format` method, and what
3 would later be called "new-style" string formatting. For the sake of
4 explicit correctness, it is probably best to refer to Python's dual
5 string formatting capabilities as *bracket-style* and
6 *percent-style*. There is overlap, but one does not replace the
7 other.
8
9 * Bracket-style is more pluggable, slower, and uses a method.
10 * Percent-style is simpler, faster, and uses an operator.
11
12 Bracket-style formatting brought with it a much more powerful toolbox,
13 but it was far from a full one. :meth:`str.format` uses `more powerful
14 syntax`_, but `the tools and idioms`_ for working with
15 that syntax are not well-developed nor well-advertised.
16
17 ``formatutils`` adds several functions for working with bracket-style
18 format strings:
19
20 * :class:`DeferredValue`: Defer fetching or calculating a value
21 until format time.
22 * :func:`get_format_args`: Parse the positional and keyword
23 arguments out of a format string.
24 * :func:`tokenize_format_str`: Tokenize a format string into
25 literals and :class:`BaseFormatField` objects.
26 * :func:`construct_format_field_str`: Assists in progammatic
27 construction of format strings.
28 * :func:`infer_positional_format_args`: Converts anonymous
29 references in 2.7+ format strings to explicit positional arguments
30 suitable for usage with Python 2.6.
31
32 .. _more powerful syntax: https://docs.python.org/2/library/string.html#format-string-syntax
33 .. _the tools and idioms: https://docs.python.org/2/library/string.html#string-formatting
34 .. _PEP 3101: https://www.python.org/dev/peps/pep-3101/
35 """
36 # TODO: also include percent-formatting utils?
37 # TODO: include lithoxyl.formatters.Formatter (or some adaptation)?
38
39 from __future__ import print_function
40
41 import re
42 from string import Formatter
43
44 try:
45 unicode # Python 2
46 except NameError:
47 unicode = str # Python 3
48
49 __all__ = ['DeferredValue', 'get_format_args', 'tokenize_format_str',
50 'construct_format_field_str', 'infer_positional_format_args',
51 'BaseFormatField']
52
53
54 _pos_farg_re = re.compile('({{)|' # escaped open-brace
55 '(}})|' # escaped close-brace
56 r'({[:!.\[}])') # anon positional format arg
57
58
59 def construct_format_field_str(fname, fspec, conv):
60 """
61 Constructs a format field string from the field name, spec, and
62 conversion character (``fname``, ``fspec``, ``conv``). See Python
63 String Formatting for more info.
64 """
65 if fname is None:
66 return ''
67 ret = '{' + fname
68 if conv:
69 ret += '!' + conv
70 if fspec:
71 ret += ':' + fspec
72 ret += '}'
73 return ret
74
75
76 def split_format_str(fstr):
77 """Does very basic splitting of a format string, returns a list of
78 strings. For full tokenization, see :func:`tokenize_format_str`.
79
80 """
81 ret = []
82
83 for lit, fname, fspec, conv in Formatter().parse(fstr):
84 if fname is None:
85 ret.append((lit, None))
86 continue
87 field_str = construct_format_field_str(fname, fspec, conv)
88 ret.append((lit, field_str))
89 return ret
90
91
92 def infer_positional_format_args(fstr):
93 """Takes format strings with anonymous positional arguments, (e.g.,
94 "{}" and {:d}), and converts them into numbered ones for explicitness and
95 compatibility with 2.6.
96
97 Returns a string with the inferred positional arguments.
98 """
99 # TODO: memoize
100 ret, max_anon = '', 0
101 # look for {: or {! or {. or {[ or {}
102 start, end, prev_end = 0, 0, 0
103 for match in _pos_farg_re.finditer(fstr):
104 start, end, group = match.start(), match.end(), match.group()
105 if prev_end < start:
106 ret += fstr[prev_end:start]
107 prev_end = end
108 if group == '{{' or group == '}}':
109 ret += group
110 continue
111 ret += '{%s%s' % (max_anon, group[1:])
112 max_anon += 1
113 ret += fstr[prev_end:]
114 return ret
115
116
117 # This approach is hardly exhaustive but it works for most builtins
118 _INTCHARS = 'bcdoxXn'
119 _FLOATCHARS = 'eEfFgGn%'
120 _TYPE_MAP = dict([(x, int) for x in _INTCHARS] +
121 [(x, float) for x in _FLOATCHARS])
122 _TYPE_MAP['s'] = str
123
124
125 def get_format_args(fstr):
126 """
127 Turn a format string into two lists of arguments referenced by the
128 format string. One is positional arguments, and the other is named
129 arguments. Each element of the list includes the name and the
130 nominal type of the field.
131
132 # >>> get_format_args("{noun} is {1:d} years old{punct}")
133 # ([(1, <type 'int'>)], [('noun', <type 'str'>), ('punct', <type 'str'>)])
134
135 # XXX: Py3k
136 >>> get_format_args("{noun} is {1:d} years old{punct}") == \
137 ([(1, int)], [('noun', str), ('punct', str)])
138 True
139 """
140 # TODO: memoize
141 formatter = Formatter()
142 fargs, fkwargs, _dedup = [], [], set()
143
144 def _add_arg(argname, type_char='s'):
145 if argname not in _dedup:
146 _dedup.add(argname)
147 argtype = _TYPE_MAP.get(type_char, str) # TODO: unicode
148 try:
149 fargs.append((int(argname), argtype))
150 except ValueError:
151 fkwargs.append((argname, argtype))
152
153 for lit, fname, fspec, conv in formatter.parse(fstr):
154 if fname is not None:
155 type_char = fspec[-1:]
156 fname_list = re.split('[.[]', fname)
157 if len(fname_list) > 1:
158 raise ValueError('encountered compound format arg: %r' % fname)
159 try:
160 base_fname = fname_list[0]
161 assert base_fname
162 except (IndexError, AssertionError):
163 raise ValueError('encountered anonymous positional argument')
164 _add_arg(fname, type_char)
165 for sublit, subfname, _, _ in formatter.parse(fspec):
166 # TODO: positional and anon args not allowed here.
167 if subfname is not None:
168 _add_arg(subfname)
169 return fargs, fkwargs
170
171
172 def tokenize_format_str(fstr, resolve_pos=True):
173 """Takes a format string, turns it into a list of alternating string
174 literals and :class:`BaseFormatField` tokens. By default, also
175 infers anonymous positional references into explicit, numbered
176 positional references. To disable this behavior set *resolve_pos*
177 to ``False``.
178 """
179 ret = []
180 if resolve_pos:
181 fstr = infer_positional_format_args(fstr)
182 formatter = Formatter()
183 for lit, fname, fspec, conv in formatter.parse(fstr):
184 if lit:
185 ret.append(lit)
186 if fname is None:
187 continue
188 ret.append(BaseFormatField(fname, fspec, conv))
189 return ret
190
191
192 class BaseFormatField(object):
193 """A class representing a reference to an argument inside of a
194 bracket-style format string. For instance, in ``"{greeting},
195 world!"``, there is a field named "greeting".
196
197 These fields can have many options applied to them. See the
198 Python docs on `Format String Syntax`_ for the full details.
199
200 .. _Format String Syntax: https://docs.python.org/2/library/string.html#string-formatting
201 """
202 def __init__(self, fname, fspec='', conv=None):
203 self.set_fname(fname)
204 self.set_fspec(fspec)
205 self.set_conv(conv)
206
207 def set_fname(self, fname):
208 "Set the field name."
209
210 path_list = re.split('[.[]', fname) # TODO
211
212 self.base_name = path_list[0]
213 self.fname = fname
214 self.subpath = path_list[1:]
215 self.is_positional = not self.base_name or self.base_name.isdigit()
216
217 def set_fspec(self, fspec):
218 "Set the field spec."
219 fspec = fspec or ''
220 subfields = []
221 for sublit, subfname, _, _ in Formatter().parse(fspec):
222 if subfname is not None:
223 subfields.append(subfname)
224 self.subfields = subfields
225 self.fspec = fspec
226 self.type_char = fspec[-1:]
227 self.type_func = _TYPE_MAP.get(self.type_char, str)
228
229 def set_conv(self, conv):
230 """There are only two built-in converters: ``s`` and ``r``. They are
231 somewhat rare and appearlike ``"{ref!r}"``."""
232 # TODO
233 self.conv = conv
234 self.conv_func = None # TODO
235
236 @property
237 def fstr(self):
238 "The current state of the field in string format."
239 return construct_format_field_str(self.fname, self.fspec, self.conv)
240
241 def __repr__(self):
242 cn = self.__class__.__name__
243 args = [self.fname]
244 if self.conv is not None:
245 args.extend([self.fspec, self.conv])
246 elif self.fspec != '':
247 args.append(self.fspec)
248 args_repr = ', '.join([repr(a) for a in args])
249 return '%s(%s)' % (cn, args_repr)
250
251 def __str__(self):
252 return self.fstr
253
254
255 _UNSET = object()
256
257
258 class DeferredValue(object):
259 """:class:`DeferredValue` is a wrapper type, used to defer computing
260 values which would otherwise be expensive to stringify and
261 format. This is most valuable in areas like logging, where one
262 would not want to waste time formatting a value for a log message
263 which will subsequently be filtered because the message's log
264 level was DEBUG and the logger was set to only emit CRITICAL
265 messages.
266
267 The :class:``DeferredValue`` is initialized with a callable that
268 takes no arguments and returns the value, which can be of any
269 type. By default DeferredValue only calls that callable once, and
270 future references will get a cached value. This behavior can be
271 disabled by setting *cache_value* to ``False``.
272
273 Args:
274
275 func (function): A callable that takes no arguments and
276 computes the value being represented.
277 cache_value (bool): Whether subsequent usages will call *func*
278 again. Defaults to ``True``.
279
280 >>> import sys
281 >>> dv = DeferredValue(lambda: len(sys._current_frames()))
282 >>> output = "works great in all {0} threads!".format(dv)
283
284 PROTIP: To keep lines shorter, use: ``from formatutils import
285 DeferredValue as DV``
286 """
287 def __init__(self, func, cache_value=True):
288 self.func = func
289 self.cache_value = True
290 self._value = _UNSET
291
292 def get_value(self):
293 """Computes, optionally caches, and returns the value of the
294 *func*. If ``get_value()`` has been called before, a cached
295 value may be returned depending on the *cache_value* option
296 passed to the constructor.
297 """
298 if self._value is not _UNSET and self.cache_value:
299 value = self._value
300 else:
301 value = self.func()
302 if self.cache_value:
303 self._value = value
304 return value
305
306 def __int__(self):
307 return int(self.get_value())
308
309 def __float__(self):
310 return float(self.get_value())
311
312 def __str__(self):
313 return str(self.get_value())
314
315 def __unicode__(self):
316 return unicode(self.get_value())
317
318 def __repr__(self):
319 return repr(self.get_value())
320
321 def __format__(self, fmt):
322 value = self.get_value()
323
324 pt = fmt[-1:] # presentation type
325 type_conv = _TYPE_MAP.get(pt, str)
326
327 try:
328 return value.__format__(fmt)
329 except (ValueError, TypeError):
330 # TODO: this may be overkill
331 return type_conv(value).__format__(fmt)
332
333 # end formatutils.py