comparison env/lib/python3.7/site-packages/pyparsing.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # -*- coding: utf-8 -*-
2 # module pyparsing.py
3 #
4 # Copyright (c) 2003-2019 Paul T. McGuire
5 #
6 # Permission is hereby granted, free of charge, to any person obtaining
7 # a copy of this software and associated documentation files (the
8 # "Software"), to deal in the Software without restriction, including
9 # without limitation the rights to use, copy, modify, merge, publish,
10 # distribute, sublicense, and/or sell copies of the Software, and to
11 # permit persons to whom the Software is furnished to do so, subject to
12 # the following conditions:
13 #
14 # The above copyright notice and this permission notice shall be
15 # included in all copies or substantial portions of the Software.
16 #
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29 =============================================================================
30
31 The pyparsing module is an alternative approach to creating and
32 executing simple grammars, vs. the traditional lex/yacc approach, or the
33 use of regular expressions. With pyparsing, you don't need to learn
34 a new syntax for defining grammars or matching expressions - the parsing
35 module provides a library of classes that you use to construct the
36 grammar directly in Python.
37
38 Here is a program to parse "Hello, World!" (or any greeting of the form
39 ``"<salutation>, <addressee>!"``), built up using :class:`Word`,
40 :class:`Literal`, and :class:`And` elements
41 (the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
42 and the strings are auto-converted to :class:`Literal` expressions)::
43
44 from pyparsing import Word, alphas
45
46 # define grammar of a greeting
47 greet = Word(alphas) + "," + Word(alphas) + "!"
48
49 hello = "Hello, World!"
50 print (hello, "->", greet.parseString(hello))
51
52 The program outputs the following::
53
54 Hello, World! -> ['Hello', ',', 'World', '!']
55
56 The Python representation of the grammar is quite readable, owing to the
57 self-explanatory class names, and the use of '+', '|' and '^' operators.
58
59 The :class:`ParseResults` object returned from
60 :class:`ParserElement.parseString` can be
61 accessed as a nested list, a dictionary, or an object with named
62 attributes.
63
64 The pyparsing module handles some of the problems that are typically
65 vexing when writing text parsers:
66
67 - extra or missing whitespace (the above program will also handle
68 "Hello,World!", "Hello , World !", etc.)
69 - quoted strings
70 - embedded comments
71
72
73 Getting Started -
74 -----------------
75 Visit the classes :class:`ParserElement` and :class:`ParseResults` to
76 see the base classes that most other pyparsing
77 classes inherit from. Use the docstrings for examples of how to:
78
79 - construct literal match expressions from :class:`Literal` and
80 :class:`CaselessLiteral` classes
81 - construct character word-group expressions using the :class:`Word`
82 class
83 - see how to create repetitive expressions using :class:`ZeroOrMore`
84 and :class:`OneOrMore` classes
85 - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
86 and :class:`'&'<Each>` operators to combine simple expressions into
87 more complex ones
88 - associate names with your parsed results using
89 :class:`ParserElement.setResultsName`
90 - access the parsed data, which is returned as a :class:`ParseResults`
91 object
92 - find some helpful expression short-cuts like :class:`delimitedList`
93 and :class:`oneOf`
94 - find more useful common expressions in the :class:`pyparsing_common`
95 namespace class
96 """
97
98 __version__ = "2.4.7"
99 __versionTime__ = "30 Mar 2020 00:43 UTC"
100 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
101
102 import string
103 from weakref import ref as wkref
104 import copy
105 import sys
106 import warnings
107 import re
108 import sre_constants
109 import collections
110 import pprint
111 import traceback
112 import types
113 from datetime import datetime
114 from operator import itemgetter
115 import itertools
116 from functools import wraps
117 from contextlib import contextmanager
118
119 try:
120 # Python 3
121 from itertools import filterfalse
122 except ImportError:
123 from itertools import ifilterfalse as filterfalse
124
125 try:
126 from _thread import RLock
127 except ImportError:
128 from threading import RLock
129
130 try:
131 # Python 3
132 from collections.abc import Iterable
133 from collections.abc import MutableMapping, Mapping
134 except ImportError:
135 # Python 2.7
136 from collections import Iterable
137 from collections import MutableMapping, Mapping
138
139 try:
140 from collections import OrderedDict as _OrderedDict
141 except ImportError:
142 try:
143 from ordereddict import OrderedDict as _OrderedDict
144 except ImportError:
145 _OrderedDict = None
146
147 try:
148 from types import SimpleNamespace
149 except ImportError:
150 class SimpleNamespace: pass
151
152 # version compatibility configuration
153 __compat__ = SimpleNamespace()
154 __compat__.__doc__ = """
155 A cross-version compatibility configuration for pyparsing features that will be
156 released in a future version. By setting values in this configuration to True,
157 those features can be enabled in prior versions for compatibility development
158 and testing.
159
160 - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
161 of results names when an And expression is nested within an Or or MatchFirst; set to
162 True to enable bugfix released in pyparsing 2.3.0, or False to preserve
163 pre-2.3.0 handling of named results
164 """
165 __compat__.collect_all_And_tokens = True
166
167 __diag__ = SimpleNamespace()
168 __diag__.__doc__ = """
169 Diagnostic configuration (all default to False)
170 - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
171 name is defined on a MatchFirst or Or expression with one or more And subexpressions
172 (only warns if __compat__.collect_all_And_tokens is False)
173 - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
174 name is defined on a containing expression with ungrouped subexpressions that also
175 have results names
176 - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
177 with a results name, but has no contents defined
178 - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
179 incorrectly called with multiple str arguments
180 - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
181 calls to ParserElement.setName()
182 """
183 __diag__.warn_multiple_tokens_in_named_alternation = False
184 __diag__.warn_ungrouped_named_tokens_in_collection = False
185 __diag__.warn_name_set_on_empty_Forward = False
186 __diag__.warn_on_multiple_string_args_to_oneof = False
187 __diag__.enable_debug_on_named_expressions = False
188 __diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")]
189
190 def _enable_all_warnings():
191 __diag__.warn_multiple_tokens_in_named_alternation = True
192 __diag__.warn_ungrouped_named_tokens_in_collection = True
193 __diag__.warn_name_set_on_empty_Forward = True
194 __diag__.warn_on_multiple_string_args_to_oneof = True
195 __diag__.enable_all_warnings = _enable_all_warnings
196
197
198 __all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__',
199 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
200 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
201 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
202 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
203 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
204 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
205 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
206 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
207 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
208 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
209 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
210 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
211 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
212 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
213 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
214 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
215 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass',
216 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
217 'conditionAsParseAction', 're',
218 ]
219
220 system_version = tuple(sys.version_info)[:3]
221 PY_3 = system_version[0] == 3
222 if PY_3:
223 _MAX_INT = sys.maxsize
224 basestring = str
225 unichr = chr
226 unicode = str
227 _ustr = str
228
229 # build list of single arg builtins, that can be used as parse actions
230 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
231
232 else:
233 _MAX_INT = sys.maxint
234 range = xrange
235
236 def _ustr(obj):
237 """Drop-in replacement for str(obj) that tries to be Unicode
238 friendly. It first tries str(obj). If that fails with
239 a UnicodeEncodeError, then it tries unicode(obj). It then
240 < returns the unicode object | encodes it with the default
241 encoding | ... >.
242 """
243 if isinstance(obj, unicode):
244 return obj
245
246 try:
247 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
248 # it won't break any existing code.
249 return str(obj)
250
251 except UnicodeEncodeError:
252 # Else encode it
253 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
254 xmlcharref = Regex(r'&#\d+;')
255 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
256 return xmlcharref.transformString(ret)
257
258 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
259 singleArgBuiltins = []
260 import __builtin__
261
262 for fname in "sum len sorted reversed list tuple set any all min max".split():
263 try:
264 singleArgBuiltins.append(getattr(__builtin__, fname))
265 except AttributeError:
266 continue
267
268 _generatorType = type((y for y in range(1)))
269
270 def _xml_escape(data):
271 """Escape &, <, >, ", ', etc. in a string of data."""
272
273 # ampersand must be replaced first
274 from_symbols = '&><"\''
275 to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split())
276 for from_, to_ in zip(from_symbols, to_symbols):
277 data = data.replace(from_, to_)
278 return data
279
280 alphas = string.ascii_uppercase + string.ascii_lowercase
281 nums = "0123456789"
282 hexnums = nums + "ABCDEFabcdef"
283 alphanums = alphas + nums
284 _bslash = chr(92)
285 printables = "".join(c for c in string.printable if c not in string.whitespace)
286
287
288 def conditionAsParseAction(fn, message=None, fatal=False):
289 msg = message if message is not None else "failed user-defined condition"
290 exc_type = ParseFatalException if fatal else ParseException
291 fn = _trim_arity(fn)
292
293 @wraps(fn)
294 def pa(s, l, t):
295 if not bool(fn(s, l, t)):
296 raise exc_type(s, l, msg)
297
298 return pa
299
300 class ParseBaseException(Exception):
301 """base exception class for all parsing runtime exceptions"""
302 # Performance tuning: we construct a *lot* of these, so keep this
303 # constructor as small and fast as possible
304 def __init__(self, pstr, loc=0, msg=None, elem=None):
305 self.loc = loc
306 if msg is None:
307 self.msg = pstr
308 self.pstr = ""
309 else:
310 self.msg = msg
311 self.pstr = pstr
312 self.parserElement = elem
313 self.args = (pstr, loc, msg)
314
315 @classmethod
316 def _from_exception(cls, pe):
317 """
318 internal factory method to simplify creating one type of ParseException
319 from another - avoids having __init__ signature conflicts among subclasses
320 """
321 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
322
323 def __getattr__(self, aname):
324 """supported attributes by name are:
325 - lineno - returns the line number of the exception text
326 - col - returns the column number of the exception text
327 - line - returns the line containing the exception text
328 """
329 if aname == "lineno":
330 return lineno(self.loc, self.pstr)
331 elif aname in ("col", "column"):
332 return col(self.loc, self.pstr)
333 elif aname == "line":
334 return line(self.loc, self.pstr)
335 else:
336 raise AttributeError(aname)
337
338 def __str__(self):
339 if self.pstr:
340 if self.loc >= len(self.pstr):
341 foundstr = ', found end of text'
342 else:
343 foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\')
344 else:
345 foundstr = ''
346 return ("%s%s (at char %d), (line:%d, col:%d)" %
347 (self.msg, foundstr, self.loc, self.lineno, self.column))
348 def __repr__(self):
349 return _ustr(self)
350 def markInputline(self, markerString=">!<"):
351 """Extracts the exception line from the input string, and marks
352 the location of the exception with a special symbol.
353 """
354 line_str = self.line
355 line_column = self.column - 1
356 if markerString:
357 line_str = "".join((line_str[:line_column],
358 markerString, line_str[line_column:]))
359 return line_str.strip()
360 def __dir__(self):
361 return "lineno col line".split() + dir(type(self))
362
363 class ParseException(ParseBaseException):
364 """
365 Exception thrown when parse expressions don't match class;
366 supported attributes by name are:
367 - lineno - returns the line number of the exception text
368 - col - returns the column number of the exception text
369 - line - returns the line containing the exception text
370
371 Example::
372
373 try:
374 Word(nums).setName("integer").parseString("ABC")
375 except ParseException as pe:
376 print(pe)
377 print("column: {}".format(pe.col))
378
379 prints::
380
381 Expected integer (at char 0), (line:1, col:1)
382 column: 1
383
384 """
385
386 @staticmethod
387 def explain(exc, depth=16):
388 """
389 Method to take an exception and translate the Python internal traceback into a list
390 of the pyparsing expressions that caused the exception to be raised.
391
392 Parameters:
393
394 - exc - exception raised during parsing (need not be a ParseException, in support
395 of Python exceptions that might be raised in a parse action)
396 - depth (default=16) - number of levels back in the stack trace to list expression
397 and function names; if None, the full stack trace names will be listed; if 0, only
398 the failing input line, marker, and exception string will be shown
399
400 Returns a multi-line string listing the ParserElements and/or function names in the
401 exception's stack trace.
402
403 Note: the diagnostic output will include string representations of the expressions
404 that failed to parse. These representations will be more helpful if you use `setName` to
405 give identifiable names to your expressions. Otherwise they will use the default string
406 forms, which may be cryptic to read.
407
408 explain() is only supported under Python 3.
409 """
410 import inspect
411
412 if depth is None:
413 depth = sys.getrecursionlimit()
414 ret = []
415 if isinstance(exc, ParseBaseException):
416 ret.append(exc.line)
417 ret.append(' ' * (exc.col - 1) + '^')
418 ret.append("{0}: {1}".format(type(exc).__name__, exc))
419
420 if depth > 0:
421 callers = inspect.getinnerframes(exc.__traceback__, context=depth)
422 seen = set()
423 for i, ff in enumerate(callers[-depth:]):
424 frm = ff[0]
425
426 f_self = frm.f_locals.get('self', None)
427 if isinstance(f_self, ParserElement):
428 if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
429 continue
430 if f_self in seen:
431 continue
432 seen.add(f_self)
433
434 self_type = type(f_self)
435 ret.append("{0}.{1} - {2}".format(self_type.__module__,
436 self_type.__name__,
437 f_self))
438 elif f_self is not None:
439 self_type = type(f_self)
440 ret.append("{0}.{1}".format(self_type.__module__,
441 self_type.__name__))
442 else:
443 code = frm.f_code
444 if code.co_name in ('wrapper', '<module>'):
445 continue
446
447 ret.append("{0}".format(code.co_name))
448
449 depth -= 1
450 if not depth:
451 break
452
453 return '\n'.join(ret)
454
455
456 class ParseFatalException(ParseBaseException):
457 """user-throwable exception thrown when inconsistent parse content
458 is found; stops all parsing immediately"""
459 pass
460
461 class ParseSyntaxException(ParseFatalException):
462 """just like :class:`ParseFatalException`, but thrown internally
463 when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
464 that parsing is to stop immediately because an unbacktrackable
465 syntax error has been found.
466 """
467 pass
468
469 #~ class ReparseException(ParseBaseException):
470 #~ """Experimental class - parse actions can raise this exception to cause
471 #~ pyparsing to reparse the input string:
472 #~ - with a modified input string, and/or
473 #~ - with a modified start location
474 #~ Set the values of the ReparseException in the constructor, and raise the
475 #~ exception in a parse action to cause pyparsing to use the new string/location.
476 #~ Setting the values as None causes no change to be made.
477 #~ """
478 #~ def __init_( self, newstring, restartLoc ):
479 #~ self.newParseText = newstring
480 #~ self.reparseLoc = restartLoc
481
482 class RecursiveGrammarException(Exception):
483 """exception thrown by :class:`ParserElement.validate` if the
484 grammar could be improperly recursive
485 """
486 def __init__(self, parseElementList):
487 self.parseElementTrace = parseElementList
488
489 def __str__(self):
490 return "RecursiveGrammarException: %s" % self.parseElementTrace
491
492 class _ParseResultsWithOffset(object):
493 def __init__(self, p1, p2):
494 self.tup = (p1, p2)
495 def __getitem__(self, i):
496 return self.tup[i]
497 def __repr__(self):
498 return repr(self.tup[0])
499 def setOffset(self, i):
500 self.tup = (self.tup[0], i)
501
502 class ParseResults(object):
503 """Structured parse results, to provide multiple means of access to
504 the parsed data:
505
506 - as a list (``len(results)``)
507 - by list index (``results[0], results[1]``, etc.)
508 - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`)
509
510 Example::
511
512 integer = Word(nums)
513 date_str = (integer.setResultsName("year") + '/'
514 + integer.setResultsName("month") + '/'
515 + integer.setResultsName("day"))
516 # equivalent form:
517 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
518
519 # parseString returns a ParseResults object
520 result = date_str.parseString("1999/12/31")
521
522 def test(s, fn=repr):
523 print("%s -> %s" % (s, fn(eval(s))))
524 test("list(result)")
525 test("result[0]")
526 test("result['month']")
527 test("result.day")
528 test("'month' in result")
529 test("'minutes' in result")
530 test("result.dump()", str)
531
532 prints::
533
534 list(result) -> ['1999', '/', '12', '/', '31']
535 result[0] -> '1999'
536 result['month'] -> '12'
537 result.day -> '31'
538 'month' in result -> True
539 'minutes' in result -> False
540 result.dump() -> ['1999', '/', '12', '/', '31']
541 - day: 31
542 - month: 12
543 - year: 1999
544 """
545 def __new__(cls, toklist=None, name=None, asList=True, modal=True):
546 if isinstance(toklist, cls):
547 return toklist
548 retobj = object.__new__(cls)
549 retobj.__doinit = True
550 return retobj
551
552 # Performance tuning: we construct a *lot* of these, so keep this
553 # constructor as small and fast as possible
554 def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance):
555 if self.__doinit:
556 self.__doinit = False
557 self.__name = None
558 self.__parent = None
559 self.__accumNames = {}
560 self.__asList = asList
561 self.__modal = modal
562 if toklist is None:
563 toklist = []
564 if isinstance(toklist, list):
565 self.__toklist = toklist[:]
566 elif isinstance(toklist, _generatorType):
567 self.__toklist = list(toklist)
568 else:
569 self.__toklist = [toklist]
570 self.__tokdict = dict()
571
572 if name is not None and name:
573 if not modal:
574 self.__accumNames[name] = 0
575 if isinstance(name, int):
576 name = _ustr(name) # will always return a str, but use _ustr for consistency
577 self.__name = name
578 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])):
579 if isinstance(toklist, basestring):
580 toklist = [toklist]
581 if asList:
582 if isinstance(toklist, ParseResults):
583 self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
584 else:
585 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
586 self[name].__name = name
587 else:
588 try:
589 self[name] = toklist[0]
590 except (KeyError, TypeError, IndexError):
591 self[name] = toklist
592
593 def __getitem__(self, i):
594 if isinstance(i, (int, slice)):
595 return self.__toklist[i]
596 else:
597 if i not in self.__accumNames:
598 return self.__tokdict[i][-1][0]
599 else:
600 return ParseResults([v[0] for v in self.__tokdict[i]])
601
602 def __setitem__(self, k, v, isinstance=isinstance):
603 if isinstance(v, _ParseResultsWithOffset):
604 self.__tokdict[k] = self.__tokdict.get(k, list()) + [v]
605 sub = v[0]
606 elif isinstance(k, (int, slice)):
607 self.__toklist[k] = v
608 sub = v
609 else:
610 self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)]
611 sub = v
612 if isinstance(sub, ParseResults):
613 sub.__parent = wkref(self)
614
615 def __delitem__(self, i):
616 if isinstance(i, (int, slice)):
617 mylen = len(self.__toklist)
618 del self.__toklist[i]
619
620 # convert int to slice
621 if isinstance(i, int):
622 if i < 0:
623 i += mylen
624 i = slice(i, i + 1)
625 # get removed indices
626 removed = list(range(*i.indices(mylen)))
627 removed.reverse()
628 # fixup indices in token dictionary
629 for name, occurrences in self.__tokdict.items():
630 for j in removed:
631 for k, (value, position) in enumerate(occurrences):
632 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
633 else:
634 del self.__tokdict[i]
635
636 def __contains__(self, k):
637 return k in self.__tokdict
638
639 def __len__(self):
640 return len(self.__toklist)
641
642 def __bool__(self):
643 return (not not self.__toklist)
644 __nonzero__ = __bool__
645
646 def __iter__(self):
647 return iter(self.__toklist)
648
649 def __reversed__(self):
650 return iter(self.__toklist[::-1])
651
652 def _iterkeys(self):
653 if hasattr(self.__tokdict, "iterkeys"):
654 return self.__tokdict.iterkeys()
655 else:
656 return iter(self.__tokdict)
657
658 def _itervalues(self):
659 return (self[k] for k in self._iterkeys())
660
661 def _iteritems(self):
662 return ((k, self[k]) for k in self._iterkeys())
663
664 if PY_3:
665 keys = _iterkeys
666 """Returns an iterator of all named result keys."""
667
668 values = _itervalues
669 """Returns an iterator of all named result values."""
670
671 items = _iteritems
672 """Returns an iterator of all named result key-value tuples."""
673
674 else:
675 iterkeys = _iterkeys
676 """Returns an iterator of all named result keys (Python 2.x only)."""
677
678 itervalues = _itervalues
679 """Returns an iterator of all named result values (Python 2.x only)."""
680
681 iteritems = _iteritems
682 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
683
684 def keys(self):
685 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
686 return list(self.iterkeys())
687
688 def values(self):
689 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
690 return list(self.itervalues())
691
692 def items(self):
693 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
694 return list(self.iteritems())
695
696 def haskeys(self):
697 """Since keys() returns an iterator, this method is helpful in bypassing
698 code that looks for the existence of any defined results names."""
699 return bool(self.__tokdict)
700
701 def pop(self, *args, **kwargs):
702 """
703 Removes and returns item at specified index (default= ``last``).
704 Supports both ``list`` and ``dict`` semantics for ``pop()``. If
705 passed no argument or an integer argument, it will use ``list``
706 semantics and pop tokens from the list of parsed tokens. If passed
707 a non-integer argument (most likely a string), it will use ``dict``
708 semantics and pop the corresponding value from any defined results
709 names. A second default return value argument is supported, just as in
710 ``dict.pop()``.
711
712 Example::
713
714 def remove_first(tokens):
715 tokens.pop(0)
716 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
717 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
718
719 label = Word(alphas)
720 patt = label("LABEL") + OneOrMore(Word(nums))
721 print(patt.parseString("AAB 123 321").dump())
722
723 # Use pop() in a parse action to remove named result (note that corresponding value is not
724 # removed from list form of results)
725 def remove_LABEL(tokens):
726 tokens.pop("LABEL")
727 return tokens
728 patt.addParseAction(remove_LABEL)
729 print(patt.parseString("AAB 123 321").dump())
730
731 prints::
732
733 ['AAB', '123', '321']
734 - LABEL: AAB
735
736 ['AAB', '123', '321']
737 """
738 if not args:
739 args = [-1]
740 for k, v in kwargs.items():
741 if k == 'default':
742 args = (args[0], v)
743 else:
744 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
745 if (isinstance(args[0], int)
746 or len(args) == 1
747 or args[0] in self):
748 index = args[0]
749 ret = self[index]
750 del self[index]
751 return ret
752 else:
753 defaultvalue = args[1]
754 return defaultvalue
755
756 def get(self, key, defaultValue=None):
757 """
758 Returns named result matching the given key, or if there is no
759 such name, then returns the given ``defaultValue`` or ``None`` if no
760 ``defaultValue`` is specified.
761
762 Similar to ``dict.get()``.
763
764 Example::
765
766 integer = Word(nums)
767 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
768
769 result = date_str.parseString("1999/12/31")
770 print(result.get("year")) # -> '1999'
771 print(result.get("hour", "not specified")) # -> 'not specified'
772 print(result.get("hour")) # -> None
773 """
774 if key in self:
775 return self[key]
776 else:
777 return defaultValue
778
779 def insert(self, index, insStr):
780 """
781 Inserts new element at location index in the list of parsed tokens.
782
783 Similar to ``list.insert()``.
784
785 Example::
786
787 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
788
789 # use a parse action to insert the parse location in the front of the parsed results
790 def insert_locn(locn, tokens):
791 tokens.insert(0, locn)
792 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
793 """
794 self.__toklist.insert(index, insStr)
795 # fixup indices in token dictionary
796 for name, occurrences in self.__tokdict.items():
797 for k, (value, position) in enumerate(occurrences):
798 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
799
800 def append(self, item):
801 """
802 Add single element to end of ParseResults list of elements.
803
804 Example::
805
806 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
807
808 # use a parse action to compute the sum of the parsed integers, and add it to the end
809 def append_sum(tokens):
810 tokens.append(sum(map(int, tokens)))
811 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
812 """
813 self.__toklist.append(item)
814
815 def extend(self, itemseq):
816 """
817 Add sequence of elements to end of ParseResults list of elements.
818
819 Example::
820
821 patt = OneOrMore(Word(alphas))
822
823 # use a parse action to append the reverse of the matched strings, to make a palindrome
824 def make_palindrome(tokens):
825 tokens.extend(reversed([t[::-1] for t in tokens]))
826 return ''.join(tokens)
827 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
828 """
829 if isinstance(itemseq, ParseResults):
830 self.__iadd__(itemseq)
831 else:
832 self.__toklist.extend(itemseq)
833
834 def clear(self):
835 """
836 Clear all elements and results names.
837 """
838 del self.__toklist[:]
839 self.__tokdict.clear()
840
841 def __getattr__(self, name):
842 try:
843 return self[name]
844 except KeyError:
845 return ""
846
847 def __add__(self, other):
848 ret = self.copy()
849 ret += other
850 return ret
851
852 def __iadd__(self, other):
853 if other.__tokdict:
854 offset = len(self.__toklist)
855 addoffset = lambda a: offset if a < 0 else a + offset
856 otheritems = other.__tokdict.items()
857 otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
858 for k, vlist in otheritems for v in vlist]
859 for k, v in otherdictitems:
860 self[k] = v
861 if isinstance(v[0], ParseResults):
862 v[0].__parent = wkref(self)
863
864 self.__toklist += other.__toklist
865 self.__accumNames.update(other.__accumNames)
866 return self
867
868 def __radd__(self, other):
869 if isinstance(other, int) and other == 0:
870 # useful for merging many ParseResults using sum() builtin
871 return self.copy()
872 else:
873 # this may raise a TypeError - so be it
874 return other + self
875
876 def __repr__(self):
877 return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict))
878
879 def __str__(self):
880 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
881
882 def _asStringList(self, sep=''):
883 out = []
884 for item in self.__toklist:
885 if out and sep:
886 out.append(sep)
887 if isinstance(item, ParseResults):
888 out += item._asStringList()
889 else:
890 out.append(_ustr(item))
891 return out
892
893 def asList(self):
894 """
895 Returns the parse results as a nested list of matching tokens, all converted to strings.
896
897 Example::
898
899 patt = OneOrMore(Word(alphas))
900 result = patt.parseString("sldkj lsdkj sldkj")
901 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
902 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
903
904 # Use asList() to create an actual list
905 result_list = result.asList()
906 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
907 """
908 return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist]
909
910 def asDict(self):
911 """
912 Returns the named parse results as a nested dictionary.
913
914 Example::
915
916 integer = Word(nums)
917 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
918
919 result = date_str.parseString('12/31/1999')
920 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
921
922 result_dict = result.asDict()
923 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
924
925 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
926 import json
927 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
928 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
929 """
930 if PY_3:
931 item_fn = self.items
932 else:
933 item_fn = self.iteritems
934
935 def toItem(obj):
936 if isinstance(obj, ParseResults):
937 if obj.haskeys():
938 return obj.asDict()
939 else:
940 return [toItem(v) for v in obj]
941 else:
942 return obj
943
944 return dict((k, toItem(v)) for k, v in item_fn())
945
946 def copy(self):
947 """
948 Returns a new copy of a :class:`ParseResults` object.
949 """
950 ret = ParseResults(self.__toklist)
951 ret.__tokdict = dict(self.__tokdict.items())
952 ret.__parent = self.__parent
953 ret.__accumNames.update(self.__accumNames)
954 ret.__name = self.__name
955 return ret
956
957 def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
958 """
959 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
960 """
961 nl = "\n"
962 out = []
963 namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items()
964 for v in vlist)
965 nextLevelIndent = indent + " "
966
967 # collapse out indents if formatting is not desired
968 if not formatted:
969 indent = ""
970 nextLevelIndent = ""
971 nl = ""
972
973 selfTag = None
974 if doctag is not None:
975 selfTag = doctag
976 else:
977 if self.__name:
978 selfTag = self.__name
979
980 if not selfTag:
981 if namedItemsOnly:
982 return ""
983 else:
984 selfTag = "ITEM"
985
986 out += [nl, indent, "<", selfTag, ">"]
987
988 for i, res in enumerate(self.__toklist):
989 if isinstance(res, ParseResults):
990 if i in namedItems:
991 out += [res.asXML(namedItems[i],
992 namedItemsOnly and doctag is None,
993 nextLevelIndent,
994 formatted)]
995 else:
996 out += [res.asXML(None,
997 namedItemsOnly and doctag is None,
998 nextLevelIndent,
999 formatted)]
1000 else:
1001 # individual token, see if there is a name for it
1002 resTag = None
1003 if i in namedItems:
1004 resTag = namedItems[i]
1005 if not resTag:
1006 if namedItemsOnly:
1007 continue
1008 else:
1009 resTag = "ITEM"
1010 xmlBodyText = _xml_escape(_ustr(res))
1011 out += [nl, nextLevelIndent, "<", resTag, ">",
1012 xmlBodyText,
1013 "</", resTag, ">"]
1014
1015 out += [nl, indent, "</", selfTag, ">"]
1016 return "".join(out)
1017
1018 def __lookup(self, sub):
1019 for k, vlist in self.__tokdict.items():
1020 for v, loc in vlist:
1021 if sub is v:
1022 return k
1023 return None
1024
1025 def getName(self):
1026 r"""
1027 Returns the results name for this token expression. Useful when several
1028 different expressions might match at a particular location.
1029
1030 Example::
1031
1032 integer = Word(nums)
1033 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
1034 house_number_expr = Suppress('#') + Word(nums, alphanums)
1035 user_data = (Group(house_number_expr)("house_number")
1036 | Group(ssn_expr)("ssn")
1037 | Group(integer)("age"))
1038 user_info = OneOrMore(user_data)
1039
1040 result = user_info.parseString("22 111-22-3333 #221B")
1041 for item in result:
1042 print(item.getName(), ':', item[0])
1043
1044 prints::
1045
1046 age : 22
1047 ssn : 111-22-3333
1048 house_number : 221B
1049 """
1050 if self.__name:
1051 return self.__name
1052 elif self.__parent:
1053 par = self.__parent()
1054 if par:
1055 return par.__lookup(self)
1056 else:
1057 return None
1058 elif (len(self) == 1
1059 and len(self.__tokdict) == 1
1060 and next(iter(self.__tokdict.values()))[0][1] in (0, -1)):
1061 return next(iter(self.__tokdict.keys()))
1062 else:
1063 return None
1064
1065 def dump(self, indent='', full=True, include_list=True, _depth=0):
1066 """
1067 Diagnostic method for listing out the contents of
1068 a :class:`ParseResults`. Accepts an optional ``indent`` argument so
1069 that this string can be embedded in a nested display of other data.
1070
1071 Example::
1072
1073 integer = Word(nums)
1074 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1075
1076 result = date_str.parseString('12/31/1999')
1077 print(result.dump())
1078
1079 prints::
1080
1081 ['12', '/', '31', '/', '1999']
1082 - day: 1999
1083 - month: 31
1084 - year: 12
1085 """
1086 out = []
1087 NL = '\n'
1088 if include_list:
1089 out.append(indent + _ustr(self.asList()))
1090 else:
1091 out.append('')
1092
1093 if full:
1094 if self.haskeys():
1095 items = sorted((str(k), v) for k, v in self.items())
1096 for k, v in items:
1097 if out:
1098 out.append(NL)
1099 out.append("%s%s- %s: " % (indent, (' ' * _depth), k))
1100 if isinstance(v, ParseResults):
1101 if v:
1102 out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1))
1103 else:
1104 out.append(_ustr(v))
1105 else:
1106 out.append(repr(v))
1107 elif any(isinstance(vv, ParseResults) for vv in self):
1108 v = self
1109 for i, vv in enumerate(v):
1110 if isinstance(vv, ParseResults):
1111 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1112 (' ' * (_depth)),
1113 i,
1114 indent,
1115 (' ' * (_depth + 1)),
1116 vv.dump(indent=indent,
1117 full=full,
1118 include_list=include_list,
1119 _depth=_depth + 1)))
1120 else:
1121 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1122 (' ' * (_depth)),
1123 i,
1124 indent,
1125 (' ' * (_depth + 1)),
1126 _ustr(vv)))
1127
1128 return "".join(out)
1129
1130 def pprint(self, *args, **kwargs):
1131 """
1132 Pretty-printer for parsed results as a list, using the
1133 `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
1134 Accepts additional positional or keyword args as defined for
1135 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
1136
1137 Example::
1138
1139 ident = Word(alphas, alphanums)
1140 num = Word(nums)
1141 func = Forward()
1142 term = ident | num | Group('(' + func + ')')
1143 func <<= ident + Group(Optional(delimitedList(term)))
1144 result = func.parseString("fna a,b,(fnb c,d,200),100")
1145 result.pprint(width=40)
1146
1147 prints::
1148
1149 ['fna',
1150 ['a',
1151 'b',
1152 ['(', 'fnb', ['c', 'd', '200'], ')'],
1153 '100']]
1154 """
1155 pprint.pprint(self.asList(), *args, **kwargs)
1156
1157 # add support for pickle protocol
1158 def __getstate__(self):
1159 return (self.__toklist,
1160 (self.__tokdict.copy(),
1161 self.__parent is not None and self.__parent() or None,
1162 self.__accumNames,
1163 self.__name))
1164
1165 def __setstate__(self, state):
1166 self.__toklist = state[0]
1167 self.__tokdict, par, inAccumNames, self.__name = state[1]
1168 self.__accumNames = {}
1169 self.__accumNames.update(inAccumNames)
1170 if par is not None:
1171 self.__parent = wkref(par)
1172 else:
1173 self.__parent = None
1174
1175 def __getnewargs__(self):
1176 return self.__toklist, self.__name, self.__asList, self.__modal
1177
1178 def __dir__(self):
1179 return dir(type(self)) + list(self.keys())
1180
1181 @classmethod
1182 def from_dict(cls, other, name=None):
1183 """
1184 Helper classmethod to construct a ParseResults from a dict, preserving the
1185 name-value relations as results names. If an optional 'name' argument is
1186 given, a nested ParseResults will be returned
1187 """
1188 def is_iterable(obj):
1189 try:
1190 iter(obj)
1191 except Exception:
1192 return False
1193 else:
1194 if PY_3:
1195 return not isinstance(obj, (str, bytes))
1196 else:
1197 return not isinstance(obj, basestring)
1198
1199 ret = cls([])
1200 for k, v in other.items():
1201 if isinstance(v, Mapping):
1202 ret += cls.from_dict(v, name=k)
1203 else:
1204 ret += cls([v], name=k, asList=is_iterable(v))
1205 if name is not None:
1206 ret = cls([ret], name=name)
1207 return ret
1208
1209 MutableMapping.register(ParseResults)
1210
1211 def col (loc, strg):
1212 """Returns current column within a string, counting newlines as line separators.
1213 The first column is number 1.
1214
1215 Note: the default parsing behavior is to expand tabs in the input string
1216 before starting the parsing process. See
1217 :class:`ParserElement.parseString` for more
1218 information on parsing strings containing ``<TAB>`` s, and suggested
1219 methods to maintain a consistent view of the parsed string, the parse
1220 location, and line and column positions within the parsed string.
1221 """
1222 s = strg
1223 return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
1224
1225 def lineno(loc, strg):
1226 """Returns current line number within a string, counting newlines as line separators.
1227 The first line is number 1.
1228
1229 Note - the default parsing behavior is to expand tabs in the input string
1230 before starting the parsing process. See :class:`ParserElement.parseString`
1231 for more information on parsing strings containing ``<TAB>`` s, and
1232 suggested methods to maintain a consistent view of the parsed string, the
1233 parse location, and line and column positions within the parsed string.
1234 """
1235 return strg.count("\n", 0, loc) + 1
1236
1237 def line(loc, strg):
1238 """Returns the line of text containing loc within a string, counting newlines as line separators.
1239 """
1240 lastCR = strg.rfind("\n", 0, loc)
1241 nextCR = strg.find("\n", loc)
1242 if nextCR >= 0:
1243 return strg[lastCR + 1:nextCR]
1244 else:
1245 return strg[lastCR + 1:]
1246
1247 def _defaultStartDebugAction(instring, loc, expr):
1248 print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))))
1249
1250 def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
1251 print("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1252
1253 def _defaultExceptionDebugAction(instring, loc, expr, exc):
1254 print("Exception raised:" + _ustr(exc))
1255
1256 def nullDebugAction(*args):
1257 """'Do-nothing' debug action, to suppress debugging output during parsing."""
1258 pass
1259
1260 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1261 #~ 'decorator to trim function calls to match the arity of the target'
1262 #~ def _trim_arity(func, maxargs=3):
1263 #~ if func in singleArgBuiltins:
1264 #~ return lambda s,l,t: func(t)
1265 #~ limit = 0
1266 #~ foundArity = False
1267 #~ def wrapper(*args):
1268 #~ nonlocal limit,foundArity
1269 #~ while 1:
1270 #~ try:
1271 #~ ret = func(*args[limit:])
1272 #~ foundArity = True
1273 #~ return ret
1274 #~ except TypeError:
1275 #~ if limit == maxargs or foundArity:
1276 #~ raise
1277 #~ limit += 1
1278 #~ continue
1279 #~ return wrapper
1280
1281 # this version is Python 2.x-3.x cross-compatible
1282 'decorator to trim function calls to match the arity of the target'
1283 def _trim_arity(func, maxargs=2):
1284 if func in singleArgBuiltins:
1285 return lambda s, l, t: func(t)
1286 limit = [0]
1287 foundArity = [False]
1288
1289 # traceback return data structure changed in Py3.5 - normalize back to plain tuples
1290 if system_version[:2] >= (3, 5):
1291 def extract_stack(limit=0):
1292 # special handling for Python 3.5.0 - extra deep call stack by 1
1293 offset = -3 if system_version == (3, 5, 0) else -2
1294 frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
1295 return [frame_summary[:2]]
1296 def extract_tb(tb, limit=0):
1297 frames = traceback.extract_tb(tb, limit=limit)
1298 frame_summary = frames[-1]
1299 return [frame_summary[:2]]
1300 else:
1301 extract_stack = traceback.extract_stack
1302 extract_tb = traceback.extract_tb
1303
1304 # synthesize what would be returned by traceback.extract_stack at the call to
1305 # user's parse action 'func', so that we don't incur call penalty at parse time
1306
1307 LINE_DIFF = 6
1308 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1309 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1310 this_line = extract_stack(limit=2)[-1]
1311 pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
1312
1313 def wrapper(*args):
1314 while 1:
1315 try:
1316 ret = func(*args[limit[0]:])
1317 foundArity[0] = True
1318 return ret
1319 except TypeError:
1320 # re-raise TypeErrors if they did not come from our arity testing
1321 if foundArity[0]:
1322 raise
1323 else:
1324 try:
1325 tb = sys.exc_info()[-1]
1326 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1327 raise
1328 finally:
1329 try:
1330 del tb
1331 except NameError:
1332 pass
1333
1334 if limit[0] <= maxargs:
1335 limit[0] += 1
1336 continue
1337 raise
1338
1339 # copy func name to wrapper for sensible debug output
1340 func_name = "<parse action>"
1341 try:
1342 func_name = getattr(func, '__name__',
1343 getattr(func, '__class__').__name__)
1344 except Exception:
1345 func_name = str(func)
1346 wrapper.__name__ = func_name
1347
1348 return wrapper
1349
1350
1351 class ParserElement(object):
1352 """Abstract base level parser element class."""
1353 DEFAULT_WHITE_CHARS = " \n\t\r"
1354 verbose_stacktrace = False
1355
1356 @staticmethod
1357 def setDefaultWhitespaceChars(chars):
1358 r"""
1359 Overrides the default whitespace chars
1360
1361 Example::
1362
1363 # default whitespace chars are space, <TAB> and newline
1364 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1365
1366 # change to just treat newline as significant
1367 ParserElement.setDefaultWhitespaceChars(" \t")
1368 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1369 """
1370 ParserElement.DEFAULT_WHITE_CHARS = chars
1371
1372 @staticmethod
1373 def inlineLiteralsUsing(cls):
1374 """
1375 Set class to be used for inclusion of string literals into a parser.
1376
1377 Example::
1378
1379 # default literal class used is Literal
1380 integer = Word(nums)
1381 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1382
1383 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1384
1385
1386 # change to Suppress
1387 ParserElement.inlineLiteralsUsing(Suppress)
1388 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1389
1390 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1391 """
1392 ParserElement._literalStringClass = cls
1393
1394 @classmethod
1395 def _trim_traceback(cls, tb):
1396 while tb.tb_next:
1397 tb = tb.tb_next
1398 return tb
1399
1400 def __init__(self, savelist=False):
1401 self.parseAction = list()
1402 self.failAction = None
1403 # ~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
1404 self.strRepr = None
1405 self.resultsName = None
1406 self.saveAsList = savelist
1407 self.skipWhitespace = True
1408 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
1409 self.copyDefaultWhiteChars = True
1410 self.mayReturnEmpty = False # used when checking for left-recursion
1411 self.keepTabs = False
1412 self.ignoreExprs = list()
1413 self.debug = False
1414 self.streamlined = False
1415 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1416 self.errmsg = ""
1417 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1418 self.debugActions = (None, None, None) # custom debug actions
1419 self.re = None
1420 self.callPreparse = True # used to avoid redundant calls to preParse
1421 self.callDuringTry = False
1422
1423 def copy(self):
1424 """
1425 Make a copy of this :class:`ParserElement`. Useful for defining
1426 different parse actions for the same parsing pattern, using copies of
1427 the original parse element.
1428
1429 Example::
1430
1431 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1432 integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K")
1433 integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1434
1435 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1436
1437 prints::
1438
1439 [5120, 100, 655360, 268435456]
1440
1441 Equivalent form of ``expr.copy()`` is just ``expr()``::
1442
1443 integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1444 """
1445 cpy = copy.copy(self)
1446 cpy.parseAction = self.parseAction[:]
1447 cpy.ignoreExprs = self.ignoreExprs[:]
1448 if self.copyDefaultWhiteChars:
1449 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1450 return cpy
1451
1452 def setName(self, name):
1453 """
1454 Define name for this expression, makes debugging and exception messages clearer.
1455
1456 Example::
1457
1458 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1459 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1460 """
1461 self.name = name
1462 self.errmsg = "Expected " + self.name
1463 if __diag__.enable_debug_on_named_expressions:
1464 self.setDebug()
1465 return self
1466
1467 def setResultsName(self, name, listAllMatches=False):
1468 """
1469 Define name for referencing matching tokens as a nested attribute
1470 of the returned parse results.
1471 NOTE: this returns a *copy* of the original :class:`ParserElement` object;
1472 this is so that the client can define a basic element, such as an
1473 integer, and reference it in multiple places with different names.
1474
1475 You can also set results names using the abbreviated syntax,
1476 ``expr("name")`` in place of ``expr.setResultsName("name")``
1477 - see :class:`__call__`.
1478
1479 Example::
1480
1481 date_str = (integer.setResultsName("year") + '/'
1482 + integer.setResultsName("month") + '/'
1483 + integer.setResultsName("day"))
1484
1485 # equivalent form:
1486 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1487 """
1488 return self._setResultsName(name, listAllMatches)
1489
1490 def _setResultsName(self, name, listAllMatches=False):
1491 newself = self.copy()
1492 if name.endswith("*"):
1493 name = name[:-1]
1494 listAllMatches = True
1495 newself.resultsName = name
1496 newself.modalResults = not listAllMatches
1497 return newself
1498
1499 def setBreak(self, breakFlag=True):
1500 """Method to invoke the Python pdb debugger when this element is
1501 about to be parsed. Set ``breakFlag`` to True to enable, False to
1502 disable.
1503 """
1504 if breakFlag:
1505 _parseMethod = self._parse
1506 def breaker(instring, loc, doActions=True, callPreParse=True):
1507 import pdb
1508 # this call to pdb.set_trace() is intentional, not a checkin error
1509 pdb.set_trace()
1510 return _parseMethod(instring, loc, doActions, callPreParse)
1511 breaker._originalParseMethod = _parseMethod
1512 self._parse = breaker
1513 else:
1514 if hasattr(self._parse, "_originalParseMethod"):
1515 self._parse = self._parse._originalParseMethod
1516 return self
1517
1518 def setParseAction(self, *fns, **kwargs):
1519 """
1520 Define one or more actions to perform when successfully matching parse element definition.
1521 Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` ,
1522 ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
1523
1524 - s = the original string being parsed (see note below)
1525 - loc = the location of the matching substring
1526 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
1527
1528 If the functions in fns modify the tokens, they can return them as the return
1529 value from fn, and the modified list of tokens will replace the original.
1530 Otherwise, fn does not need to return any value.
1531
1532 If None is passed as the parse action, all previously added parse actions for this
1533 expression are cleared.
1534
1535 Optional keyword arguments:
1536 - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing
1537
1538 Note: the default parsing behavior is to expand tabs in the input string
1539 before starting the parsing process. See :class:`parseString for more
1540 information on parsing strings containing ``<TAB>`` s, and suggested
1541 methods to maintain a consistent view of the parsed string, the parse
1542 location, and line and column positions within the parsed string.
1543
1544 Example::
1545
1546 integer = Word(nums)
1547 date_str = integer + '/' + integer + '/' + integer
1548
1549 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1550
1551 # use parse action to convert to ints at parse time
1552 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1553 date_str = integer + '/' + integer + '/' + integer
1554
1555 # note that integer fields are now ints, not strings
1556 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1557 """
1558 if list(fns) == [None,]:
1559 self.parseAction = []
1560 else:
1561 if not all(callable(fn) for fn in fns):
1562 raise TypeError("parse actions must be callable")
1563 self.parseAction = list(map(_trim_arity, list(fns)))
1564 self.callDuringTry = kwargs.get("callDuringTry", False)
1565 return self
1566
1567 def addParseAction(self, *fns, **kwargs):
1568 """
1569 Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
1570
1571 See examples in :class:`copy`.
1572 """
1573 self.parseAction += list(map(_trim_arity, list(fns)))
1574 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1575 return self
1576
1577 def addCondition(self, *fns, **kwargs):
1578 """Add a boolean predicate function to expression's list of parse actions. See
1579 :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
1580 functions passed to ``addCondition`` need to return boolean success/fail of the condition.
1581
1582 Optional keyword arguments:
1583 - message = define a custom message to be used in the raised exception
1584 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1585
1586 Example::
1587
1588 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1589 year_int = integer.copy()
1590 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1591 date_str = year_int + '/' + integer + '/' + integer
1592
1593 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1594 """
1595 for fn in fns:
1596 self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'),
1597 fatal=kwargs.get('fatal', False)))
1598
1599 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1600 return self
1601
1602 def setFailAction(self, fn):
1603 """Define action to perform if parsing fails at this expression.
1604 Fail acton fn is a callable function that takes the arguments
1605 ``fn(s, loc, expr, err)`` where:
1606 - s = string being parsed
1607 - loc = location where expression match was attempted and failed
1608 - expr = the parse expression that failed
1609 - err = the exception thrown
1610 The function returns no value. It may throw :class:`ParseFatalException`
1611 if it is desired to stop parsing immediately."""
1612 self.failAction = fn
1613 return self
1614
1615 def _skipIgnorables(self, instring, loc):
1616 exprsFound = True
1617 while exprsFound:
1618 exprsFound = False
1619 for e in self.ignoreExprs:
1620 try:
1621 while 1:
1622 loc, dummy = e._parse(instring, loc)
1623 exprsFound = True
1624 except ParseException:
1625 pass
1626 return loc
1627
1628 def preParse(self, instring, loc):
1629 if self.ignoreExprs:
1630 loc = self._skipIgnorables(instring, loc)
1631
1632 if self.skipWhitespace:
1633 wt = self.whiteChars
1634 instrlen = len(instring)
1635 while loc < instrlen and instring[loc] in wt:
1636 loc += 1
1637
1638 return loc
1639
1640 def parseImpl(self, instring, loc, doActions=True):
1641 return loc, []
1642
1643 def postParse(self, instring, loc, tokenlist):
1644 return tokenlist
1645
1646 # ~ @profile
1647 def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
1648 TRY, MATCH, FAIL = 0, 1, 2
1649 debugging = (self.debug) # and doActions)
1650
1651 if debugging or self.failAction:
1652 # ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring)))
1653 if self.debugActions[TRY]:
1654 self.debugActions[TRY](instring, loc, self)
1655 try:
1656 if callPreParse and self.callPreparse:
1657 preloc = self.preParse(instring, loc)
1658 else:
1659 preloc = loc
1660 tokensStart = preloc
1661 if self.mayIndexError or preloc >= len(instring):
1662 try:
1663 loc, tokens = self.parseImpl(instring, preloc, doActions)
1664 except IndexError:
1665 raise ParseException(instring, len(instring), self.errmsg, self)
1666 else:
1667 loc, tokens = self.parseImpl(instring, preloc, doActions)
1668 except Exception as err:
1669 # ~ print ("Exception raised:", err)
1670 if self.debugActions[FAIL]:
1671 self.debugActions[FAIL](instring, tokensStart, self, err)
1672 if self.failAction:
1673 self.failAction(instring, tokensStart, self, err)
1674 raise
1675 else:
1676 if callPreParse and self.callPreparse:
1677 preloc = self.preParse(instring, loc)
1678 else:
1679 preloc = loc
1680 tokensStart = preloc
1681 if self.mayIndexError or preloc >= len(instring):
1682 try:
1683 loc, tokens = self.parseImpl(instring, preloc, doActions)
1684 except IndexError:
1685 raise ParseException(instring, len(instring), self.errmsg, self)
1686 else:
1687 loc, tokens = self.parseImpl(instring, preloc, doActions)
1688
1689 tokens = self.postParse(instring, loc, tokens)
1690
1691 retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults)
1692 if self.parseAction and (doActions or self.callDuringTry):
1693 if debugging:
1694 try:
1695 for fn in self.parseAction:
1696 try:
1697 tokens = fn(instring, tokensStart, retTokens)
1698 except IndexError as parse_action_exc:
1699 exc = ParseException("exception raised in parse action")
1700 exc.__cause__ = parse_action_exc
1701 raise exc
1702
1703 if tokens is not None and tokens is not retTokens:
1704 retTokens = ParseResults(tokens,
1705 self.resultsName,
1706 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1707 modal=self.modalResults)
1708 except Exception as err:
1709 # ~ print "Exception raised in user parse action:", err
1710 if self.debugActions[FAIL]:
1711 self.debugActions[FAIL](instring, tokensStart, self, err)
1712 raise
1713 else:
1714 for fn in self.parseAction:
1715 try:
1716 tokens = fn(instring, tokensStart, retTokens)
1717 except IndexError as parse_action_exc:
1718 exc = ParseException("exception raised in parse action")
1719 exc.__cause__ = parse_action_exc
1720 raise exc
1721
1722 if tokens is not None and tokens is not retTokens:
1723 retTokens = ParseResults(tokens,
1724 self.resultsName,
1725 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1726 modal=self.modalResults)
1727 if debugging:
1728 # ~ print ("Matched", self, "->", retTokens.asList())
1729 if self.debugActions[MATCH]:
1730 self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens)
1731
1732 return loc, retTokens
1733
1734 def tryParse(self, instring, loc):
1735 try:
1736 return self._parse(instring, loc, doActions=False)[0]
1737 except ParseFatalException:
1738 raise ParseException(instring, loc, self.errmsg, self)
1739
1740 def canParseNext(self, instring, loc):
1741 try:
1742 self.tryParse(instring, loc)
1743 except (ParseException, IndexError):
1744 return False
1745 else:
1746 return True
1747
1748 class _UnboundedCache(object):
1749 def __init__(self):
1750 cache = {}
1751 self.not_in_cache = not_in_cache = object()
1752
1753 def get(self, key):
1754 return cache.get(key, not_in_cache)
1755
1756 def set(self, key, value):
1757 cache[key] = value
1758
1759 def clear(self):
1760 cache.clear()
1761
1762 def cache_len(self):
1763 return len(cache)
1764
1765 self.get = types.MethodType(get, self)
1766 self.set = types.MethodType(set, self)
1767 self.clear = types.MethodType(clear, self)
1768 self.__len__ = types.MethodType(cache_len, self)
1769
1770 if _OrderedDict is not None:
1771 class _FifoCache(object):
1772 def __init__(self, size):
1773 self.not_in_cache = not_in_cache = object()
1774
1775 cache = _OrderedDict()
1776
1777 def get(self, key):
1778 return cache.get(key, not_in_cache)
1779
1780 def set(self, key, value):
1781 cache[key] = value
1782 while len(cache) > size:
1783 try:
1784 cache.popitem(False)
1785 except KeyError:
1786 pass
1787
1788 def clear(self):
1789 cache.clear()
1790
1791 def cache_len(self):
1792 return len(cache)
1793
1794 self.get = types.MethodType(get, self)
1795 self.set = types.MethodType(set, self)
1796 self.clear = types.MethodType(clear, self)
1797 self.__len__ = types.MethodType(cache_len, self)
1798
1799 else:
1800 class _FifoCache(object):
1801 def __init__(self, size):
1802 self.not_in_cache = not_in_cache = object()
1803
1804 cache = {}
1805 key_fifo = collections.deque([], size)
1806
1807 def get(self, key):
1808 return cache.get(key, not_in_cache)
1809
1810 def set(self, key, value):
1811 cache[key] = value
1812 while len(key_fifo) > size:
1813 cache.pop(key_fifo.popleft(), None)
1814 key_fifo.append(key)
1815
1816 def clear(self):
1817 cache.clear()
1818 key_fifo.clear()
1819
1820 def cache_len(self):
1821 return len(cache)
1822
1823 self.get = types.MethodType(get, self)
1824 self.set = types.MethodType(set, self)
1825 self.clear = types.MethodType(clear, self)
1826 self.__len__ = types.MethodType(cache_len, self)
1827
1828 # argument cache for optimizing repeated calls when backtracking through recursive expressions
1829 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1830 packrat_cache_lock = RLock()
1831 packrat_cache_stats = [0, 0]
1832
1833 # this method gets repeatedly called during backtracking with the same arguments -
1834 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1835 def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
1836 HIT, MISS = 0, 1
1837 lookup = (self, instring, loc, callPreParse, doActions)
1838 with ParserElement.packrat_cache_lock:
1839 cache = ParserElement.packrat_cache
1840 value = cache.get(lookup)
1841 if value is cache.not_in_cache:
1842 ParserElement.packrat_cache_stats[MISS] += 1
1843 try:
1844 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1845 except ParseBaseException as pe:
1846 # cache a copy of the exception, without the traceback
1847 cache.set(lookup, pe.__class__(*pe.args))
1848 raise
1849 else:
1850 cache.set(lookup, (value[0], value[1].copy()))
1851 return value
1852 else:
1853 ParserElement.packrat_cache_stats[HIT] += 1
1854 if isinstance(value, Exception):
1855 raise value
1856 return value[0], value[1].copy()
1857
1858 _parse = _parseNoCache
1859
1860 @staticmethod
1861 def resetCache():
1862 ParserElement.packrat_cache.clear()
1863 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1864
1865 _packratEnabled = False
1866 @staticmethod
1867 def enablePackrat(cache_size_limit=128):
1868 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1869 Repeated parse attempts at the same string location (which happens
1870 often in many complex grammars) can immediately return a cached value,
1871 instead of re-executing parsing/validating code. Memoizing is done of
1872 both valid results and parsing exceptions.
1873
1874 Parameters:
1875
1876 - cache_size_limit - (default= ``128``) - if an integer value is provided
1877 will limit the size of the packrat cache; if None is passed, then
1878 the cache size will be unbounded; if 0 is passed, the cache will
1879 be effectively disabled.
1880
1881 This speedup may break existing programs that use parse actions that
1882 have side-effects. For this reason, packrat parsing is disabled when
1883 you first import pyparsing. To activate the packrat feature, your
1884 program must call the class method :class:`ParserElement.enablePackrat`.
1885 For best results, call ``enablePackrat()`` immediately after
1886 importing pyparsing.
1887
1888 Example::
1889
1890 import pyparsing
1891 pyparsing.ParserElement.enablePackrat()
1892 """
1893 if not ParserElement._packratEnabled:
1894 ParserElement._packratEnabled = True
1895 if cache_size_limit is None:
1896 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1897 else:
1898 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1899 ParserElement._parse = ParserElement._parseCache
1900
1901 def parseString(self, instring, parseAll=False):
1902 """
1903 Execute the parse expression with the given string.
1904 This is the main interface to the client code, once the complete
1905 expression has been built.
1906
1907 Returns the parsed data as a :class:`ParseResults` object, which may be
1908 accessed as a list, or as a dict or object with attributes if the given parser
1909 includes results names.
1910
1911 If you want the grammar to require that the entire input string be
1912 successfully parsed, then set ``parseAll`` to True (equivalent to ending
1913 the grammar with ``StringEnd()``).
1914
1915 Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
1916 in order to report proper column numbers in parse actions.
1917 If the input string contains tabs and
1918 the grammar uses parse actions that use the ``loc`` argument to index into the
1919 string being parsed, you can ensure you have a consistent view of the input
1920 string by:
1921
1922 - calling ``parseWithTabs`` on your grammar before calling ``parseString``
1923 (see :class:`parseWithTabs`)
1924 - define your parse action using the full ``(s, loc, toks)`` signature, and
1925 reference the input string using the parse action's ``s`` argument
1926 - explictly expand the tabs in your input string before calling
1927 ``parseString``
1928
1929 Example::
1930
1931 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1932 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1933 """
1934 ParserElement.resetCache()
1935 if not self.streamlined:
1936 self.streamline()
1937 # ~ self.saveAsList = True
1938 for e in self.ignoreExprs:
1939 e.streamline()
1940 if not self.keepTabs:
1941 instring = instring.expandtabs()
1942 try:
1943 loc, tokens = self._parse(instring, 0)
1944 if parseAll:
1945 loc = self.preParse(instring, loc)
1946 se = Empty() + StringEnd()
1947 se._parse(instring, loc)
1948 except ParseBaseException as exc:
1949 if ParserElement.verbose_stacktrace:
1950 raise
1951 else:
1952 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1953 if getattr(exc, '__traceback__', None) is not None:
1954 exc.__traceback__ = self._trim_traceback(exc.__traceback__)
1955 raise exc
1956 else:
1957 return tokens
1958
1959 def scanString(self, instring, maxMatches=_MAX_INT, overlap=False):
1960 """
1961 Scan the input string for expression matches. Each match will return the
1962 matching tokens, start location, and end location. May be called with optional
1963 ``maxMatches`` argument, to clip scanning after 'n' matches are found. If
1964 ``overlap`` is specified, then overlapping matches will be reported.
1965
1966 Note that the start and end locations are reported relative to the string
1967 being parsed. See :class:`parseString` for more information on parsing
1968 strings with embedded tabs.
1969
1970 Example::
1971
1972 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1973 print(source)
1974 for tokens, start, end in Word(alphas).scanString(source):
1975 print(' '*start + '^'*(end-start))
1976 print(' '*start + tokens[0])
1977
1978 prints::
1979
1980 sldjf123lsdjjkf345sldkjf879lkjsfd987
1981 ^^^^^
1982 sldjf
1983 ^^^^^^^
1984 lsdjjkf
1985 ^^^^^^
1986 sldkjf
1987 ^^^^^^
1988 lkjsfd
1989 """
1990 if not self.streamlined:
1991 self.streamline()
1992 for e in self.ignoreExprs:
1993 e.streamline()
1994
1995 if not self.keepTabs:
1996 instring = _ustr(instring).expandtabs()
1997 instrlen = len(instring)
1998 loc = 0
1999 preparseFn = self.preParse
2000 parseFn = self._parse
2001 ParserElement.resetCache()
2002 matches = 0
2003 try:
2004 while loc <= instrlen and matches < maxMatches:
2005 try:
2006 preloc = preparseFn(instring, loc)
2007 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
2008 except ParseException:
2009 loc = preloc + 1
2010 else:
2011 if nextLoc > loc:
2012 matches += 1
2013 yield tokens, preloc, nextLoc
2014 if overlap:
2015 nextloc = preparseFn(instring, loc)
2016 if nextloc > loc:
2017 loc = nextLoc
2018 else:
2019 loc += 1
2020 else:
2021 loc = nextLoc
2022 else:
2023 loc = preloc + 1
2024 except ParseBaseException as exc:
2025 if ParserElement.verbose_stacktrace:
2026 raise
2027 else:
2028 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2029 if getattr(exc, '__traceback__', None) is not None:
2030 exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2031 raise exc
2032
2033 def transformString(self, instring):
2034 """
2035 Extension to :class:`scanString`, to modify matching text with modified tokens that may
2036 be returned from a parse action. To use ``transformString``, define a grammar and
2037 attach a parse action to it that modifies the returned token list.
2038 Invoking ``transformString()`` on a target string will then scan for matches,
2039 and replace the matched text patterns according to the logic in the parse
2040 action. ``transformString()`` returns the resulting transformed string.
2041
2042 Example::
2043
2044 wd = Word(alphas)
2045 wd.setParseAction(lambda toks: toks[0].title())
2046
2047 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
2048
2049 prints::
2050
2051 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
2052 """
2053 out = []
2054 lastE = 0
2055 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
2056 # keep string locs straight between transformString and scanString
2057 self.keepTabs = True
2058 try:
2059 for t, s, e in self.scanString(instring):
2060 out.append(instring[lastE:s])
2061 if t:
2062 if isinstance(t, ParseResults):
2063 out += t.asList()
2064 elif isinstance(t, list):
2065 out += t
2066 else:
2067 out.append(t)
2068 lastE = e
2069 out.append(instring[lastE:])
2070 out = [o for o in out if o]
2071 return "".join(map(_ustr, _flatten(out)))
2072 except ParseBaseException as exc:
2073 if ParserElement.verbose_stacktrace:
2074 raise
2075 else:
2076 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2077 if getattr(exc, '__traceback__', None) is not None:
2078 exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2079 raise exc
2080
2081 def searchString(self, instring, maxMatches=_MAX_INT):
2082 """
2083 Another extension to :class:`scanString`, simplifying the access to the tokens found
2084 to match the given parse expression. May be called with optional
2085 ``maxMatches`` argument, to clip searching after 'n' matches are found.
2086
2087 Example::
2088
2089 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
2090 cap_word = Word(alphas.upper(), alphas.lower())
2091
2092 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
2093
2094 # the sum() builtin can be used to merge results into a single ParseResults object
2095 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
2096
2097 prints::
2098
2099 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
2100 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
2101 """
2102 try:
2103 return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)])
2104 except ParseBaseException as exc:
2105 if ParserElement.verbose_stacktrace:
2106 raise
2107 else:
2108 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2109 if getattr(exc, '__traceback__', None) is not None:
2110 exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2111 raise exc
2112
2113 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
2114 """
2115 Generator method to split a string using the given expression as a separator.
2116 May be called with optional ``maxsplit`` argument, to limit the number of splits;
2117 and the optional ``includeSeparators`` argument (default= ``False``), if the separating
2118 matching text should be included in the split results.
2119
2120 Example::
2121
2122 punc = oneOf(list(".,;:/-!?"))
2123 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
2124
2125 prints::
2126
2127 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
2128 """
2129 splits = 0
2130 last = 0
2131 for t, s, e in self.scanString(instring, maxMatches=maxsplit):
2132 yield instring[last:s]
2133 if includeSeparators:
2134 yield t[0]
2135 last = e
2136 yield instring[last:]
2137
2138 def __add__(self, other):
2139 """
2140 Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
2141 converts them to :class:`Literal`s by default.
2142
2143 Example::
2144
2145 greet = Word(alphas) + "," + Word(alphas) + "!"
2146 hello = "Hello, World!"
2147 print (hello, "->", greet.parseString(hello))
2148
2149 prints::
2150
2151 Hello, World! -> ['Hello', ',', 'World', '!']
2152
2153 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
2154
2155 Literal('start') + ... + Literal('end')
2156
2157 is equivalent to:
2158
2159 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
2160
2161 Note that the skipped text is returned with '_skipped' as a results name,
2162 and to support having multiple skips in the same parser, the value returned is
2163 a list of all skipped text.
2164 """
2165 if other is Ellipsis:
2166 return _PendingSkip(self)
2167
2168 if isinstance(other, basestring):
2169 other = self._literalStringClass(other)
2170 if not isinstance(other, ParserElement):
2171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2172 SyntaxWarning, stacklevel=2)
2173 return None
2174 return And([self, other])
2175
2176 def __radd__(self, other):
2177 """
2178 Implementation of + operator when left operand is not a :class:`ParserElement`
2179 """
2180 if other is Ellipsis:
2181 return SkipTo(self)("_skipped*") + self
2182
2183 if isinstance(other, basestring):
2184 other = self._literalStringClass(other)
2185 if not isinstance(other, ParserElement):
2186 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2187 SyntaxWarning, stacklevel=2)
2188 return None
2189 return other + self
2190
2191 def __sub__(self, other):
2192 """
2193 Implementation of - operator, returns :class:`And` with error stop
2194 """
2195 if isinstance(other, basestring):
2196 other = self._literalStringClass(other)
2197 if not isinstance(other, ParserElement):
2198 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2199 SyntaxWarning, stacklevel=2)
2200 return None
2201 return self + And._ErrorStop() + other
2202
2203 def __rsub__(self, other):
2204 """
2205 Implementation of - operator when left operand is not a :class:`ParserElement`
2206 """
2207 if isinstance(other, basestring):
2208 other = self._literalStringClass(other)
2209 if not isinstance(other, ParserElement):
2210 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2211 SyntaxWarning, stacklevel=2)
2212 return None
2213 return other - self
2214
2215 def __mul__(self, other):
2216 """
2217 Implementation of * operator, allows use of ``expr * 3`` in place of
2218 ``expr + expr + expr``. Expressions may also me multiplied by a 2-integer
2219 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
2220 may also include ``None`` as in:
2221 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
2222 to ``expr*n + ZeroOrMore(expr)``
2223 (read as "at least n instances of ``expr``")
2224 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
2225 (read as "0 to n instances of ``expr``")
2226 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
2227 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
2228
2229 Note that ``expr*(None, n)`` does not raise an exception if
2230 more than n exprs exist in the input stream; that is,
2231 ``expr*(None, n)`` does not enforce a maximum number of expr
2232 occurrences. If this behavior is desired, then write
2233 ``expr*(None, n) + ~expr``
2234 """
2235 if other is Ellipsis:
2236 other = (0, None)
2237 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
2238 other = ((0, ) + other[1:] + (None,))[:2]
2239
2240 if isinstance(other, int):
2241 minElements, optElements = other, 0
2242 elif isinstance(other, tuple):
2243 other = tuple(o if o is not Ellipsis else None for o in other)
2244 other = (other + (None, None))[:2]
2245 if other[0] is None:
2246 other = (0, other[1])
2247 if isinstance(other[0], int) and other[1] is None:
2248 if other[0] == 0:
2249 return ZeroOrMore(self)
2250 if other[0] == 1:
2251 return OneOrMore(self)
2252 else:
2253 return self * other[0] + ZeroOrMore(self)
2254 elif isinstance(other[0], int) and isinstance(other[1], int):
2255 minElements, optElements = other
2256 optElements -= minElements
2257 else:
2258 raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1]))
2259 else:
2260 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
2261
2262 if minElements < 0:
2263 raise ValueError("cannot multiply ParserElement by negative value")
2264 if optElements < 0:
2265 raise ValueError("second tuple value must be greater or equal to first tuple value")
2266 if minElements == optElements == 0:
2267 raise ValueError("cannot multiply ParserElement by 0 or (0, 0)")
2268
2269 if optElements:
2270 def makeOptionalList(n):
2271 if n > 1:
2272 return Optional(self + makeOptionalList(n - 1))
2273 else:
2274 return Optional(self)
2275 if minElements:
2276 if minElements == 1:
2277 ret = self + makeOptionalList(optElements)
2278 else:
2279 ret = And([self] * minElements) + makeOptionalList(optElements)
2280 else:
2281 ret = makeOptionalList(optElements)
2282 else:
2283 if minElements == 1:
2284 ret = self
2285 else:
2286 ret = And([self] * minElements)
2287 return ret
2288
2289 def __rmul__(self, other):
2290 return self.__mul__(other)
2291
2292 def __or__(self, other):
2293 """
2294 Implementation of | operator - returns :class:`MatchFirst`
2295 """
2296 if other is Ellipsis:
2297 return _PendingSkip(self, must_skip=True)
2298
2299 if isinstance(other, basestring):
2300 other = self._literalStringClass(other)
2301 if not isinstance(other, ParserElement):
2302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2303 SyntaxWarning, stacklevel=2)
2304 return None
2305 return MatchFirst([self, other])
2306
2307 def __ror__(self, other):
2308 """
2309 Implementation of | operator when left operand is not a :class:`ParserElement`
2310 """
2311 if isinstance(other, basestring):
2312 other = self._literalStringClass(other)
2313 if not isinstance(other, ParserElement):
2314 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2315 SyntaxWarning, stacklevel=2)
2316 return None
2317 return other | self
2318
2319 def __xor__(self, other):
2320 """
2321 Implementation of ^ operator - returns :class:`Or`
2322 """
2323 if isinstance(other, basestring):
2324 other = self._literalStringClass(other)
2325 if not isinstance(other, ParserElement):
2326 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2327 SyntaxWarning, stacklevel=2)
2328 return None
2329 return Or([self, other])
2330
2331 def __rxor__(self, other):
2332 """
2333 Implementation of ^ operator when left operand is not a :class:`ParserElement`
2334 """
2335 if isinstance(other, basestring):
2336 other = self._literalStringClass(other)
2337 if not isinstance(other, ParserElement):
2338 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2339 SyntaxWarning, stacklevel=2)
2340 return None
2341 return other ^ self
2342
2343 def __and__(self, other):
2344 """
2345 Implementation of & operator - returns :class:`Each`
2346 """
2347 if isinstance(other, basestring):
2348 other = self._literalStringClass(other)
2349 if not isinstance(other, ParserElement):
2350 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2351 SyntaxWarning, stacklevel=2)
2352 return None
2353 return Each([self, other])
2354
2355 def __rand__(self, other):
2356 """
2357 Implementation of & operator when left operand is not a :class:`ParserElement`
2358 """
2359 if isinstance(other, basestring):
2360 other = self._literalStringClass(other)
2361 if not isinstance(other, ParserElement):
2362 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2363 SyntaxWarning, stacklevel=2)
2364 return None
2365 return other & self
2366
2367 def __invert__(self):
2368 """
2369 Implementation of ~ operator - returns :class:`NotAny`
2370 """
2371 return NotAny(self)
2372
2373 def __iter__(self):
2374 # must implement __iter__ to override legacy use of sequential access to __getitem__ to
2375 # iterate over a sequence
2376 raise TypeError('%r object is not iterable' % self.__class__.__name__)
2377
2378 def __getitem__(self, key):
2379 """
2380 use ``[]`` indexing notation as a short form for expression repetition:
2381 - ``expr[n]`` is equivalent to ``expr*n``
2382 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
2383 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
2384 to ``expr*n + ZeroOrMore(expr)``
2385 (read as "at least n instances of ``expr``")
2386 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
2387 (read as "0 to n instances of ``expr``")
2388 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
2389 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
2390 ``None`` may be used in place of ``...``.
2391
2392 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
2393 if more than ``n`` ``expr``s exist in the input stream. If this behavior is
2394 desired, then write ``expr[..., n] + ~expr``.
2395 """
2396
2397 # convert single arg keys to tuples
2398 try:
2399 if isinstance(key, str):
2400 key = (key,)
2401 iter(key)
2402 except TypeError:
2403 key = (key, key)
2404
2405 if len(key) > 2:
2406 warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
2407 '... [{0}]'.format(len(key))
2408 if len(key) > 5 else ''))
2409
2410 # clip to 2 elements
2411 ret = self * tuple(key[:2])
2412 return ret
2413
2414 def __call__(self, name=None):
2415 """
2416 Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
2417
2418 If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
2419 passed as ``True``.
2420
2421 If ``name` is omitted, same as calling :class:`copy`.
2422
2423 Example::
2424
2425 # these are equivalent
2426 userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno")
2427 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
2428 """
2429 if name is not None:
2430 return self._setResultsName(name)
2431 else:
2432 return self.copy()
2433
2434 def suppress(self):
2435 """
2436 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
2437 cluttering up returned output.
2438 """
2439 return Suppress(self)
2440
2441 def leaveWhitespace(self):
2442 """
2443 Disables the skipping of whitespace before matching the characters in the
2444 :class:`ParserElement`'s defined pattern. This is normally only used internally by
2445 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2446 """
2447 self.skipWhitespace = False
2448 return self
2449
2450 def setWhitespaceChars(self, chars):
2451 """
2452 Overrides the default whitespace chars
2453 """
2454 self.skipWhitespace = True
2455 self.whiteChars = chars
2456 self.copyDefaultWhiteChars = False
2457 return self
2458
2459 def parseWithTabs(self):
2460 """
2461 Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string.
2462 Must be called before ``parseString`` when the input grammar contains elements that
2463 match ``<TAB>`` characters.
2464 """
2465 self.keepTabs = True
2466 return self
2467
2468 def ignore(self, other):
2469 """
2470 Define expression to be ignored (e.g., comments) while doing pattern
2471 matching; may be called repeatedly, to define multiple comment or other
2472 ignorable patterns.
2473
2474 Example::
2475
2476 patt = OneOrMore(Word(alphas))
2477 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2478
2479 patt.ignore(cStyleComment)
2480 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2481 """
2482 if isinstance(other, basestring):
2483 other = Suppress(other)
2484
2485 if isinstance(other, Suppress):
2486 if other not in self.ignoreExprs:
2487 self.ignoreExprs.append(other)
2488 else:
2489 self.ignoreExprs.append(Suppress(other.copy()))
2490 return self
2491
2492 def setDebugActions(self, startAction, successAction, exceptionAction):
2493 """
2494 Enable display of debugging messages while doing pattern matching.
2495 """
2496 self.debugActions = (startAction or _defaultStartDebugAction,
2497 successAction or _defaultSuccessDebugAction,
2498 exceptionAction or _defaultExceptionDebugAction)
2499 self.debug = True
2500 return self
2501
2502 def setDebug(self, flag=True):
2503 """
2504 Enable display of debugging messages while doing pattern matching.
2505 Set ``flag`` to True to enable, False to disable.
2506
2507 Example::
2508
2509 wd = Word(alphas).setName("alphaword")
2510 integer = Word(nums).setName("numword")
2511 term = wd | integer
2512
2513 # turn on debugging for wd
2514 wd.setDebug()
2515
2516 OneOrMore(term).parseString("abc 123 xyz 890")
2517
2518 prints::
2519
2520 Match alphaword at loc 0(1,1)
2521 Matched alphaword -> ['abc']
2522 Match alphaword at loc 3(1,4)
2523 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2524 Match alphaword at loc 7(1,8)
2525 Matched alphaword -> ['xyz']
2526 Match alphaword at loc 11(1,12)
2527 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2528 Match alphaword at loc 15(1,16)
2529 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2530
2531 The output shown is that produced by the default debug actions - custom debug actions can be
2532 specified using :class:`setDebugActions`. Prior to attempting
2533 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2534 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2535 message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
2536 which makes debugging and exception messages easier to understand - for instance, the default
2537 name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
2538 """
2539 if flag:
2540 self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction)
2541 else:
2542 self.debug = False
2543 return self
2544
2545 def __str__(self):
2546 return self.name
2547
2548 def __repr__(self):
2549 return _ustr(self)
2550
2551 def streamline(self):
2552 self.streamlined = True
2553 self.strRepr = None
2554 return self
2555
2556 def checkRecursion(self, parseElementList):
2557 pass
2558
2559 def validate(self, validateTrace=None):
2560 """
2561 Check defined expressions for valid structure, check for infinite recursive definitions.
2562 """
2563 self.checkRecursion([])
2564
2565 def parseFile(self, file_or_filename, parseAll=False):
2566 """
2567 Execute the parse expression on the given file or filename.
2568 If a filename is specified (instead of a file object),
2569 the entire file is opened, read, and closed before parsing.
2570 """
2571 try:
2572 file_contents = file_or_filename.read()
2573 except AttributeError:
2574 with open(file_or_filename, "r") as f:
2575 file_contents = f.read()
2576 try:
2577 return self.parseString(file_contents, parseAll)
2578 except ParseBaseException as exc:
2579 if ParserElement.verbose_stacktrace:
2580 raise
2581 else:
2582 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2583 if getattr(exc, '__traceback__', None) is not None:
2584 exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2585 raise exc
2586
2587 def __eq__(self, other):
2588 if self is other:
2589 return True
2590 elif isinstance(other, basestring):
2591 return self.matches(other)
2592 elif isinstance(other, ParserElement):
2593 return vars(self) == vars(other)
2594 return False
2595
2596 def __ne__(self, other):
2597 return not (self == other)
2598
2599 def __hash__(self):
2600 return id(self)
2601
2602 def __req__(self, other):
2603 return self == other
2604
2605 def __rne__(self, other):
2606 return not (self == other)
2607
2608 def matches(self, testString, parseAll=True):
2609 """
2610 Method for quick testing of a parser against a test string. Good for simple
2611 inline microtests of sub expressions while building up larger parser.
2612
2613 Parameters:
2614 - testString - to test against this expression for a match
2615 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2616
2617 Example::
2618
2619 expr = Word(nums)
2620 assert expr.matches("100")
2621 """
2622 try:
2623 self.parseString(_ustr(testString), parseAll=parseAll)
2624 return True
2625 except ParseBaseException:
2626 return False
2627
2628 def runTests(self, tests, parseAll=True, comment='#',
2629 fullDump=True, printResults=True, failureTests=False, postParse=None,
2630 file=None):
2631 """
2632 Execute the parse expression on a series of test strings, showing each
2633 test, the parsed results or where the parse failed. Quick and easy way to
2634 run a parse expression against a list of sample strings.
2635
2636 Parameters:
2637 - tests - a list of separate test strings, or a multiline string of test strings
2638 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2639 - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
2640 string; pass None to disable comment filtering
2641 - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
2642 if False, only dump nested list
2643 - printResults - (default= ``True``) prints test output to stdout
2644 - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
2645 - postParse - (default= ``None``) optional callback for successful parse results; called as
2646 `fn(test_string, parse_results)` and returns a string to be added to the test output
2647 - file - (default=``None``) optional file-like object to which test output will be written;
2648 if None, will default to ``sys.stdout``
2649
2650 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2651 (or failed if ``failureTests`` is True), and the results contain a list of lines of each
2652 test's output
2653
2654 Example::
2655
2656 number_expr = pyparsing_common.number.copy()
2657
2658 result = number_expr.runTests('''
2659 # unsigned integer
2660 100
2661 # negative integer
2662 -100
2663 # float with scientific notation
2664 6.02e23
2665 # integer with scientific notation
2666 1e-12
2667 ''')
2668 print("Success" if result[0] else "Failed!")
2669
2670 result = number_expr.runTests('''
2671 # stray character
2672 100Z
2673 # missing leading digit before '.'
2674 -.100
2675 # too many '.'
2676 3.14.159
2677 ''', failureTests=True)
2678 print("Success" if result[0] else "Failed!")
2679
2680 prints::
2681
2682 # unsigned integer
2683 100
2684 [100]
2685
2686 # negative integer
2687 -100
2688 [-100]
2689
2690 # float with scientific notation
2691 6.02e23
2692 [6.02e+23]
2693
2694 # integer with scientific notation
2695 1e-12
2696 [1e-12]
2697
2698 Success
2699
2700 # stray character
2701 100Z
2702 ^
2703 FAIL: Expected end of text (at char 3), (line:1, col:4)
2704
2705 # missing leading digit before '.'
2706 -.100
2707 ^
2708 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2709
2710 # too many '.'
2711 3.14.159
2712 ^
2713 FAIL: Expected end of text (at char 4), (line:1, col:5)
2714
2715 Success
2716
2717 Each test string must be on a single line. If you want to test a string that spans multiple
2718 lines, create a test like this::
2719
2720 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2721
2722 (Note that this is a raw string literal, you must include the leading 'r'.)
2723 """
2724 if isinstance(tests, basestring):
2725 tests = list(map(str.strip, tests.rstrip().splitlines()))
2726 if isinstance(comment, basestring):
2727 comment = Literal(comment)
2728 if file is None:
2729 file = sys.stdout
2730 print_ = file.write
2731
2732 allResults = []
2733 comments = []
2734 success = True
2735 NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
2736 BOM = u'\ufeff'
2737 for t in tests:
2738 if comment is not None and comment.matches(t, False) or comments and not t:
2739 comments.append(t)
2740 continue
2741 if not t:
2742 continue
2743 out = ['\n' + '\n'.join(comments) if comments else '', t]
2744 comments = []
2745 try:
2746 # convert newline marks to actual newlines, and strip leading BOM if present
2747 t = NL.transformString(t.lstrip(BOM))
2748 result = self.parseString(t, parseAll=parseAll)
2749 except ParseBaseException as pe:
2750 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2751 if '\n' in t:
2752 out.append(line(pe.loc, t))
2753 out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal)
2754 else:
2755 out.append(' ' * pe.loc + '^' + fatal)
2756 out.append("FAIL: " + str(pe))
2757 success = success and failureTests
2758 result = pe
2759 except Exception as exc:
2760 out.append("FAIL-EXCEPTION: " + str(exc))
2761 success = success and failureTests
2762 result = exc
2763 else:
2764 success = success and not failureTests
2765 if postParse is not None:
2766 try:
2767 pp_value = postParse(t, result)
2768 if pp_value is not None:
2769 if isinstance(pp_value, ParseResults):
2770 out.append(pp_value.dump())
2771 else:
2772 out.append(str(pp_value))
2773 else:
2774 out.append(result.dump())
2775 except Exception as e:
2776 out.append(result.dump(full=fullDump))
2777 out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
2778 else:
2779 out.append(result.dump(full=fullDump))
2780
2781 if printResults:
2782 if fullDump:
2783 out.append('')
2784 print_('\n'.join(out))
2785
2786 allResults.append((t, result))
2787
2788 return success, allResults
2789
2790
2791 class _PendingSkip(ParserElement):
2792 # internal placeholder class to hold a place were '...' is added to a parser element,
2793 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2794 def __init__(self, expr, must_skip=False):
2795 super(_PendingSkip, self).__init__()
2796 self.strRepr = str(expr + Empty()).replace('Empty', '...')
2797 self.name = self.strRepr
2798 self.anchor = expr
2799 self.must_skip = must_skip
2800
2801 def __add__(self, other):
2802 skipper = SkipTo(other).setName("...")("_skipped*")
2803 if self.must_skip:
2804 def must_skip(t):
2805 if not t._skipped or t._skipped.asList() == ['']:
2806 del t[0]
2807 t.pop("_skipped", None)
2808 def show_skip(t):
2809 if t._skipped.asList()[-1:] == ['']:
2810 skipped = t.pop('_skipped')
2811 t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
2812 return (self.anchor + skipper().addParseAction(must_skip)
2813 | skipper().addParseAction(show_skip)) + other
2814
2815 return self.anchor + skipper + other
2816
2817 def __repr__(self):
2818 return self.strRepr
2819
2820 def parseImpl(self, *args):
2821 raise Exception("use of `...` expression without following SkipTo target expression")
2822
2823
2824 class Token(ParserElement):
2825 """Abstract :class:`ParserElement` subclass, for defining atomic
2826 matching patterns.
2827 """
2828 def __init__(self):
2829 super(Token, self).__init__(savelist=False)
2830
2831
2832 class Empty(Token):
2833 """An empty token, will always match.
2834 """
2835 def __init__(self):
2836 super(Empty, self).__init__()
2837 self.name = "Empty"
2838 self.mayReturnEmpty = True
2839 self.mayIndexError = False
2840
2841
2842 class NoMatch(Token):
2843 """A token that will never match.
2844 """
2845 def __init__(self):
2846 super(NoMatch, self).__init__()
2847 self.name = "NoMatch"
2848 self.mayReturnEmpty = True
2849 self.mayIndexError = False
2850 self.errmsg = "Unmatchable token"
2851
2852 def parseImpl(self, instring, loc, doActions=True):
2853 raise ParseException(instring, loc, self.errmsg, self)
2854
2855
2856 class Literal(Token):
2857 """Token to exactly match a specified string.
2858
2859 Example::
2860
2861 Literal('blah').parseString('blah') # -> ['blah']
2862 Literal('blah').parseString('blahfooblah') # -> ['blah']
2863 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2864
2865 For case-insensitive matching, use :class:`CaselessLiteral`.
2866
2867 For keyword matching (force word break before and after the matched string),
2868 use :class:`Keyword` or :class:`CaselessKeyword`.
2869 """
2870 def __init__(self, matchString):
2871 super(Literal, self).__init__()
2872 self.match = matchString
2873 self.matchLen = len(matchString)
2874 try:
2875 self.firstMatchChar = matchString[0]
2876 except IndexError:
2877 warnings.warn("null string passed to Literal; use Empty() instead",
2878 SyntaxWarning, stacklevel=2)
2879 self.__class__ = Empty
2880 self.name = '"%s"' % _ustr(self.match)
2881 self.errmsg = "Expected " + self.name
2882 self.mayReturnEmpty = False
2883 self.mayIndexError = False
2884
2885 # Performance tuning: modify __class__ to select
2886 # a parseImpl optimized for single-character check
2887 if self.matchLen == 1 and type(self) is Literal:
2888 self.__class__ = _SingleCharLiteral
2889
2890 def parseImpl(self, instring, loc, doActions=True):
2891 if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc):
2892 return loc + self.matchLen, self.match
2893 raise ParseException(instring, loc, self.errmsg, self)
2894
2895 class _SingleCharLiteral(Literal):
2896 def parseImpl(self, instring, loc, doActions=True):
2897 if instring[loc] == self.firstMatchChar:
2898 return loc + 1, self.match
2899 raise ParseException(instring, loc, self.errmsg, self)
2900
2901 _L = Literal
2902 ParserElement._literalStringClass = Literal
2903
2904 class Keyword(Token):
2905 """Token to exactly match a specified string as a keyword, that is,
2906 it must be immediately followed by a non-keyword character. Compare
2907 with :class:`Literal`:
2908
2909 - ``Literal("if")`` will match the leading ``'if'`` in
2910 ``'ifAndOnlyIf'``.
2911 - ``Keyword("if")`` will not; it will only match the leading
2912 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2913
2914 Accepts two optional constructor arguments in addition to the
2915 keyword string:
2916
2917 - ``identChars`` is a string of characters that would be valid
2918 identifier characters, defaulting to all alphanumerics + "_" and
2919 "$"
2920 - ``caseless`` allows case-insensitive matching, default is ``False``.
2921
2922 Example::
2923
2924 Keyword("start").parseString("start") # -> ['start']
2925 Keyword("start").parseString("starting") # -> Exception
2926
2927 For case-insensitive matching, use :class:`CaselessKeyword`.
2928 """
2929 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2930
2931 def __init__(self, matchString, identChars=None, caseless=False):
2932 super(Keyword, self).__init__()
2933 if identChars is None:
2934 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2935 self.match = matchString
2936 self.matchLen = len(matchString)
2937 try:
2938 self.firstMatchChar = matchString[0]
2939 except IndexError:
2940 warnings.warn("null string passed to Keyword; use Empty() instead",
2941 SyntaxWarning, stacklevel=2)
2942 self.name = '"%s"' % self.match
2943 self.errmsg = "Expected " + self.name
2944 self.mayReturnEmpty = False
2945 self.mayIndexError = False
2946 self.caseless = caseless
2947 if caseless:
2948 self.caselessmatch = matchString.upper()
2949 identChars = identChars.upper()
2950 self.identChars = set(identChars)
2951
2952 def parseImpl(self, instring, loc, doActions=True):
2953 if self.caseless:
2954 if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch)
2955 and (loc >= len(instring) - self.matchLen
2956 or instring[loc + self.matchLen].upper() not in self.identChars)
2957 and (loc == 0
2958 or instring[loc - 1].upper() not in self.identChars)):
2959 return loc + self.matchLen, self.match
2960
2961 else:
2962 if instring[loc] == self.firstMatchChar:
2963 if ((self.matchLen == 1 or instring.startswith(self.match, loc))
2964 and (loc >= len(instring) - self.matchLen
2965 or instring[loc + self.matchLen] not in self.identChars)
2966 and (loc == 0 or instring[loc - 1] not in self.identChars)):
2967 return loc + self.matchLen, self.match
2968
2969 raise ParseException(instring, loc, self.errmsg, self)
2970
2971 def copy(self):
2972 c = super(Keyword, self).copy()
2973 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2974 return c
2975
2976 @staticmethod
2977 def setDefaultKeywordChars(chars):
2978 """Overrides the default Keyword chars
2979 """
2980 Keyword.DEFAULT_KEYWORD_CHARS = chars
2981
2982 class CaselessLiteral(Literal):
2983 """Token to match a specified string, ignoring case of letters.
2984 Note: the matched results will always be in the case of the given
2985 match string, NOT the case of the input text.
2986
2987 Example::
2988
2989 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2990
2991 (Contrast with example for :class:`CaselessKeyword`.)
2992 """
2993 def __init__(self, matchString):
2994 super(CaselessLiteral, self).__init__(matchString.upper())
2995 # Preserve the defining literal.
2996 self.returnString = matchString
2997 self.name = "'%s'" % self.returnString
2998 self.errmsg = "Expected " + self.name
2999
3000 def parseImpl(self, instring, loc, doActions=True):
3001 if instring[loc:loc + self.matchLen].upper() == self.match:
3002 return loc + self.matchLen, self.returnString
3003 raise ParseException(instring, loc, self.errmsg, self)
3004
3005 class CaselessKeyword(Keyword):
3006 """
3007 Caseless version of :class:`Keyword`.
3008
3009 Example::
3010
3011 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
3012
3013 (Contrast with example for :class:`CaselessLiteral`.)
3014 """
3015 def __init__(self, matchString, identChars=None):
3016 super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True)
3017
3018 class CloseMatch(Token):
3019 """A variation on :class:`Literal` which matches "close" matches,
3020 that is, strings with at most 'n' mismatching characters.
3021 :class:`CloseMatch` takes parameters:
3022
3023 - ``match_string`` - string to be matched
3024 - ``maxMismatches`` - (``default=1``) maximum number of
3025 mismatches allowed to count as a match
3026
3027 The results from a successful parse will contain the matched text
3028 from the input string and the following named results:
3029
3030 - ``mismatches`` - a list of the positions within the
3031 match_string where mismatches were found
3032 - ``original`` - the original match_string used to compare
3033 against the input string
3034
3035 If ``mismatches`` is an empty list, then the match was an exact
3036 match.
3037
3038 Example::
3039
3040 patt = CloseMatch("ATCATCGAATGGA")
3041 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
3042 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
3043
3044 # exact match
3045 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
3046
3047 # close match allowing up to 2 mismatches
3048 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
3049 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
3050 """
3051 def __init__(self, match_string, maxMismatches=1):
3052 super(CloseMatch, self).__init__()
3053 self.name = match_string
3054 self.match_string = match_string
3055 self.maxMismatches = maxMismatches
3056 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
3057 self.mayIndexError = False
3058 self.mayReturnEmpty = False
3059
3060 def parseImpl(self, instring, loc, doActions=True):
3061 start = loc
3062 instrlen = len(instring)
3063 maxloc = start + len(self.match_string)
3064
3065 if maxloc <= instrlen:
3066 match_string = self.match_string
3067 match_stringloc = 0
3068 mismatches = []
3069 maxMismatches = self.maxMismatches
3070
3071 for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)):
3072 src, mat = s_m
3073 if src != mat:
3074 mismatches.append(match_stringloc)
3075 if len(mismatches) > maxMismatches:
3076 break
3077 else:
3078 loc = match_stringloc + 1
3079 results = ParseResults([instring[start:loc]])
3080 results['original'] = match_string
3081 results['mismatches'] = mismatches
3082 return loc, results
3083
3084 raise ParseException(instring, loc, self.errmsg, self)
3085
3086
3087 class Word(Token):
3088 """Token for matching words composed of allowed character sets.
3089 Defined with string containing all allowed initial characters, an
3090 optional string containing allowed body characters (if omitted,
3091 defaults to the initial character set), and an optional minimum,
3092 maximum, and/or exact length. The default value for ``min`` is
3093 1 (a minimum value < 1 is not valid); the default values for
3094 ``max`` and ``exact`` are 0, meaning no maximum or exact
3095 length restriction. An optional ``excludeChars`` parameter can
3096 list characters that might be found in the input ``bodyChars``
3097 string; useful to define a word of all printables except for one or
3098 two characters, for instance.
3099
3100 :class:`srange` is useful for defining custom character set strings
3101 for defining ``Word`` expressions, using range notation from
3102 regular expression character sets.
3103
3104 A common mistake is to use :class:`Word` to match a specific literal
3105 string, as in ``Word("Address")``. Remember that :class:`Word`
3106 uses the string argument to define *sets* of matchable characters.
3107 This expression would match "Add", "AAA", "dAred", or any other word
3108 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
3109 exact literal string, use :class:`Literal` or :class:`Keyword`.
3110
3111 pyparsing includes helper strings for building Words:
3112
3113 - :class:`alphas`
3114 - :class:`nums`
3115 - :class:`alphanums`
3116 - :class:`hexnums`
3117 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
3118 - accented, tilded, umlauted, etc.)
3119 - :class:`punc8bit` (non-alphabetic characters in ASCII range
3120 128-255 - currency, symbols, superscripts, diacriticals, etc.)
3121 - :class:`printables` (any non-whitespace character)
3122
3123 Example::
3124
3125 # a word composed of digits
3126 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
3127
3128 # a word with a leading capital, and zero or more lowercase
3129 capital_word = Word(alphas.upper(), alphas.lower())
3130
3131 # hostnames are alphanumeric, with leading alpha, and '-'
3132 hostname = Word(alphas, alphanums + '-')
3133
3134 # roman numeral (not a strict parser, accepts invalid mix of characters)
3135 roman = Word("IVXLCDM")
3136
3137 # any string of non-whitespace characters, except for ','
3138 csv_value = Word(printables, excludeChars=",")
3139 """
3140 def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None):
3141 super(Word, self).__init__()
3142 if excludeChars:
3143 excludeChars = set(excludeChars)
3144 initChars = ''.join(c for c in initChars if c not in excludeChars)
3145 if bodyChars:
3146 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
3147 self.initCharsOrig = initChars
3148 self.initChars = set(initChars)
3149 if bodyChars:
3150 self.bodyCharsOrig = bodyChars
3151 self.bodyChars = set(bodyChars)
3152 else:
3153 self.bodyCharsOrig = initChars
3154 self.bodyChars = set(initChars)
3155
3156 self.maxSpecified = max > 0
3157
3158 if min < 1:
3159 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
3160
3161 self.minLen = min
3162
3163 if max > 0:
3164 self.maxLen = max
3165 else:
3166 self.maxLen = _MAX_INT
3167
3168 if exact > 0:
3169 self.maxLen = exact
3170 self.minLen = exact
3171
3172 self.name = _ustr(self)
3173 self.errmsg = "Expected " + self.name
3174 self.mayIndexError = False
3175 self.asKeyword = asKeyword
3176
3177 if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0):
3178 if self.bodyCharsOrig == self.initCharsOrig:
3179 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
3180 elif len(self.initCharsOrig) == 1:
3181 self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig),
3182 _escapeRegexRangeChars(self.bodyCharsOrig),)
3183 else:
3184 self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig),
3185 _escapeRegexRangeChars(self.bodyCharsOrig),)
3186 if self.asKeyword:
3187 self.reString = r"\b" + self.reString + r"\b"
3188
3189 try:
3190 self.re = re.compile(self.reString)
3191 except Exception:
3192 self.re = None
3193 else:
3194 self.re_match = self.re.match
3195 self.__class__ = _WordRegex
3196
3197 def parseImpl(self, instring, loc, doActions=True):
3198 if instring[loc] not in self.initChars:
3199 raise ParseException(instring, loc, self.errmsg, self)
3200
3201 start = loc
3202 loc += 1
3203 instrlen = len(instring)
3204 bodychars = self.bodyChars
3205 maxloc = start + self.maxLen
3206 maxloc = min(maxloc, instrlen)
3207 while loc < maxloc and instring[loc] in bodychars:
3208 loc += 1
3209
3210 throwException = False
3211 if loc - start < self.minLen:
3212 throwException = True
3213 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
3214 throwException = True
3215 elif self.asKeyword:
3216 if (start > 0 and instring[start - 1] in bodychars
3217 or loc < instrlen and instring[loc] in bodychars):
3218 throwException = True
3219
3220 if throwException:
3221 raise ParseException(instring, loc, self.errmsg, self)
3222
3223 return loc, instring[start:loc]
3224
3225 def __str__(self):
3226 try:
3227 return super(Word, self).__str__()
3228 except Exception:
3229 pass
3230
3231 if self.strRepr is None:
3232
3233 def charsAsStr(s):
3234 if len(s) > 4:
3235 return s[:4] + "..."
3236 else:
3237 return s
3238
3239 if self.initCharsOrig != self.bodyCharsOrig:
3240 self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig))
3241 else:
3242 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
3243
3244 return self.strRepr
3245
3246 class _WordRegex(Word):
3247 def parseImpl(self, instring, loc, doActions=True):
3248 result = self.re_match(instring, loc)
3249 if not result:
3250 raise ParseException(instring, loc, self.errmsg, self)
3251
3252 loc = result.end()
3253 return loc, result.group()
3254
3255
3256 class Char(_WordRegex):
3257 """A short-cut class for defining ``Word(characters, exact=1)``,
3258 when defining a match of any single character in a string of
3259 characters.
3260 """
3261 def __init__(self, charset, asKeyword=False, excludeChars=None):
3262 super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
3263 self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars))
3264 if asKeyword:
3265 self.reString = r"\b%s\b" % self.reString
3266 self.re = re.compile(self.reString)
3267 self.re_match = self.re.match
3268
3269
3270 class Regex(Token):
3271 r"""Token for matching strings that match a given regular
3272 expression. Defined with string specifying the regular expression in
3273 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3274 If the given regex contains named groups (defined using ``(?P<name>...)``),
3275 these will be preserved as named parse results.
3276
3277 If instead of the Python stdlib re module you wish to use a different RE module
3278 (such as the `regex` module), you can replace it by either building your
3279 Regex object with a compiled RE that was compiled using regex:
3280
3281 Example::
3282
3283 realnum = Regex(r"[+-]?\d+\.\d*")
3284 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3285 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3286 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3287
3288 # use regex module instead of stdlib re module to construct a Regex using
3289 # a compiled regular expression
3290 import regex
3291 parser = pp.Regex(regex.compile(r'[0-9]'))
3292
3293 """
3294 def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
3295 """The parameters ``pattern`` and ``flags`` are passed
3296 to the ``re.compile()`` function as-is. See the Python
3297 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3298 explanation of the acceptable patterns and flags.
3299 """
3300 super(Regex, self).__init__()
3301
3302 if isinstance(pattern, basestring):
3303 if not pattern:
3304 warnings.warn("null string passed to Regex; use Empty() instead",
3305 SyntaxWarning, stacklevel=2)
3306
3307 self.pattern = pattern
3308 self.flags = flags
3309
3310 try:
3311 self.re = re.compile(self.pattern, self.flags)
3312 self.reString = self.pattern
3313 except sre_constants.error:
3314 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
3315 SyntaxWarning, stacklevel=2)
3316 raise
3317
3318 elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'):
3319 self.re = pattern
3320 self.pattern = self.reString = pattern.pattern
3321 self.flags = flags
3322
3323 else:
3324 raise TypeError("Regex may only be constructed with a string or a compiled RE object")
3325
3326 self.re_match = self.re.match
3327
3328 self.name = _ustr(self)
3329 self.errmsg = "Expected " + self.name
3330 self.mayIndexError = False
3331 self.mayReturnEmpty = self.re_match("") is not None
3332 self.asGroupList = asGroupList
3333 self.asMatch = asMatch
3334 if self.asGroupList:
3335 self.parseImpl = self.parseImplAsGroupList
3336 if self.asMatch:
3337 self.parseImpl = self.parseImplAsMatch
3338
3339 def parseImpl(self, instring, loc, doActions=True):
3340 result = self.re_match(instring, loc)
3341 if not result:
3342 raise ParseException(instring, loc, self.errmsg, self)
3343
3344 loc = result.end()
3345 ret = ParseResults(result.group())
3346 d = result.groupdict()
3347 if d:
3348 for k, v in d.items():
3349 ret[k] = v
3350 return loc, ret
3351
3352 def parseImplAsGroupList(self, instring, loc, doActions=True):
3353 result = self.re_match(instring, loc)
3354 if not result:
3355 raise ParseException(instring, loc, self.errmsg, self)
3356
3357 loc = result.end()
3358 ret = result.groups()
3359 return loc, ret
3360
3361 def parseImplAsMatch(self, instring, loc, doActions=True):
3362 result = self.re_match(instring, loc)
3363 if not result:
3364 raise ParseException(instring, loc, self.errmsg, self)
3365
3366 loc = result.end()
3367 ret = result
3368 return loc, ret
3369
3370 def __str__(self):
3371 try:
3372 return super(Regex, self).__str__()
3373 except Exception:
3374 pass
3375
3376 if self.strRepr is None:
3377 self.strRepr = "Re:(%s)" % repr(self.pattern)
3378
3379 return self.strRepr
3380
3381 def sub(self, repl):
3382 r"""
3383 Return Regex with an attached parse action to transform the parsed
3384 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3385
3386 Example::
3387
3388 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3389 print(make_html.transformString("h1:main title:"))
3390 # prints "<h1>main title</h1>"
3391 """
3392 if self.asGroupList:
3393 warnings.warn("cannot use sub() with Regex(asGroupList=True)",
3394 SyntaxWarning, stacklevel=2)
3395 raise SyntaxError()
3396
3397 if self.asMatch and callable(repl):
3398 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
3399 SyntaxWarning, stacklevel=2)
3400 raise SyntaxError()
3401
3402 if self.asMatch:
3403 def pa(tokens):
3404 return tokens[0].expand(repl)
3405 else:
3406 def pa(tokens):
3407 return self.re.sub(repl, tokens[0])
3408 return self.addParseAction(pa)
3409
3410 class QuotedString(Token):
3411 r"""
3412 Token for matching strings that are delimited by quoting characters.
3413
3414 Defined with the following parameters:
3415
3416 - quoteChar - string of one or more characters defining the
3417 quote delimiting string
3418 - escChar - character to escape quotes, typically backslash
3419 (default= ``None``)
3420 - escQuote - special quote sequence to escape an embedded quote
3421 string (such as SQL's ``""`` to escape an embedded ``"``)
3422 (default= ``None``)
3423 - multiline - boolean indicating whether quotes can span
3424 multiple lines (default= ``False``)
3425 - unquoteResults - boolean indicating whether the matched text
3426 should be unquoted (default= ``True``)
3427 - endQuoteChar - string of one or more characters defining the
3428 end of the quote delimited string (default= ``None`` => same as
3429 quoteChar)
3430 - convertWhitespaceEscapes - convert escaped whitespace
3431 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3432 (default= ``True``)
3433
3434 Example::
3435
3436 qs = QuotedString('"')
3437 print(qs.searchString('lsjdf "This is the quote" sldjf'))
3438 complex_qs = QuotedString('{{', endQuoteChar='}}')
3439 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
3440 sql_qs = QuotedString('"', escQuote='""')
3441 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3442
3443 prints::
3444
3445 [['This is the quote']]
3446 [['This is the "quote"']]
3447 [['This is the quote with "embedded" quotes']]
3448 """
3449 def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False,
3450 unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
3451 super(QuotedString, self).__init__()
3452
3453 # remove white space from quote chars - wont work anyway
3454 quoteChar = quoteChar.strip()
3455 if not quoteChar:
3456 warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3457 raise SyntaxError()
3458
3459 if endQuoteChar is None:
3460 endQuoteChar = quoteChar
3461 else:
3462 endQuoteChar = endQuoteChar.strip()
3463 if not endQuoteChar:
3464 warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3465 raise SyntaxError()
3466
3467 self.quoteChar = quoteChar
3468 self.quoteCharLen = len(quoteChar)
3469 self.firstQuoteChar = quoteChar[0]
3470 self.endQuoteChar = endQuoteChar
3471 self.endQuoteCharLen = len(endQuoteChar)
3472 self.escChar = escChar
3473 self.escQuote = escQuote
3474 self.unquoteResults = unquoteResults
3475 self.convertWhitespaceEscapes = convertWhitespaceEscapes
3476
3477 if multiline:
3478 self.flags = re.MULTILINE | re.DOTALL
3479 self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar),
3480 _escapeRegexRangeChars(self.endQuoteChar[0]),
3481 (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3482 else:
3483 self.flags = 0
3484 self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar),
3485 _escapeRegexRangeChars(self.endQuoteChar[0]),
3486 (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3487 if len(self.endQuoteChar) > 1:
3488 self.pattern += (
3489 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
3490 _escapeRegexRangeChars(self.endQuoteChar[i]))
3491 for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')')
3492
3493 if escQuote:
3494 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
3495 if escChar:
3496 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
3497 self.escCharReplacePattern = re.escape(self.escChar) + "(.)"
3498 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
3499
3500 try:
3501 self.re = re.compile(self.pattern, self.flags)
3502 self.reString = self.pattern
3503 self.re_match = self.re.match
3504 except sre_constants.error:
3505 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
3506 SyntaxWarning, stacklevel=2)
3507 raise
3508
3509 self.name = _ustr(self)
3510 self.errmsg = "Expected " + self.name
3511 self.mayIndexError = False
3512 self.mayReturnEmpty = True
3513
3514 def parseImpl(self, instring, loc, doActions=True):
3515 result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None
3516 if not result:
3517 raise ParseException(instring, loc, self.errmsg, self)
3518
3519 loc = result.end()
3520 ret = result.group()
3521
3522 if self.unquoteResults:
3523
3524 # strip off quotes
3525 ret = ret[self.quoteCharLen: -self.endQuoteCharLen]
3526
3527 if isinstance(ret, basestring):
3528 # replace escaped whitespace
3529 if '\\' in ret and self.convertWhitespaceEscapes:
3530 ws_map = {
3531 r'\t': '\t',
3532 r'\n': '\n',
3533 r'\f': '\f',
3534 r'\r': '\r',
3535 }
3536 for wslit, wschar in ws_map.items():
3537 ret = ret.replace(wslit, wschar)
3538
3539 # replace escaped characters
3540 if self.escChar:
3541 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3542
3543 # replace escaped quotes
3544 if self.escQuote:
3545 ret = ret.replace(self.escQuote, self.endQuoteChar)
3546
3547 return loc, ret
3548
3549 def __str__(self):
3550 try:
3551 return super(QuotedString, self).__str__()
3552 except Exception:
3553 pass
3554
3555 if self.strRepr is None:
3556 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
3557
3558 return self.strRepr
3559
3560
3561 class CharsNotIn(Token):
3562 """Token for matching words composed of characters *not* in a given
3563 set (will include whitespace in matched characters if not listed in
3564 the provided exclusion set - see example). Defined with string
3565 containing all disallowed characters, and an optional minimum,
3566 maximum, and/or exact length. The default value for ``min`` is
3567 1 (a minimum value < 1 is not valid); the default values for
3568 ``max`` and ``exact`` are 0, meaning no maximum or exact
3569 length restriction.
3570
3571 Example::
3572
3573 # define a comma-separated-value as anything that is not a ','
3574 csv_value = CharsNotIn(',')
3575 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
3576
3577 prints::
3578
3579 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3580 """
3581 def __init__(self, notChars, min=1, max=0, exact=0):
3582 super(CharsNotIn, self).__init__()
3583 self.skipWhitespace = False
3584 self.notChars = notChars
3585
3586 if min < 1:
3587 raise ValueError("cannot specify a minimum length < 1; use "
3588 "Optional(CharsNotIn()) if zero-length char group is permitted")
3589
3590 self.minLen = min
3591
3592 if max > 0:
3593 self.maxLen = max
3594 else:
3595 self.maxLen = _MAX_INT
3596
3597 if exact > 0:
3598 self.maxLen = exact
3599 self.minLen = exact
3600
3601 self.name = _ustr(self)
3602 self.errmsg = "Expected " + self.name
3603 self.mayReturnEmpty = (self.minLen == 0)
3604 self.mayIndexError = False
3605
3606 def parseImpl(self, instring, loc, doActions=True):
3607 if instring[loc] in self.notChars:
3608 raise ParseException(instring, loc, self.errmsg, self)
3609
3610 start = loc
3611 loc += 1
3612 notchars = self.notChars
3613 maxlen = min(start + self.maxLen, len(instring))
3614 while loc < maxlen and instring[loc] not in notchars:
3615 loc += 1
3616
3617 if loc - start < self.minLen:
3618 raise ParseException(instring, loc, self.errmsg, self)
3619
3620 return loc, instring[start:loc]
3621
3622 def __str__(self):
3623 try:
3624 return super(CharsNotIn, self).__str__()
3625 except Exception:
3626 pass
3627
3628 if self.strRepr is None:
3629 if len(self.notChars) > 4:
3630 self.strRepr = "!W:(%s...)" % self.notChars[:4]
3631 else:
3632 self.strRepr = "!W:(%s)" % self.notChars
3633
3634 return self.strRepr
3635
3636 class White(Token):
3637 """Special matching class for matching whitespace. Normally,
3638 whitespace is ignored by pyparsing grammars. This class is included
3639 when some whitespace structures are significant. Define with
3640 a string containing the whitespace characters to be matched; default
3641 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3642 ``max``, and ``exact`` arguments, as defined for the
3643 :class:`Word` class.
3644 """
3645 whiteStrs = {
3646 ' ' : '<SP>',
3647 '\t': '<TAB>',
3648 '\n': '<LF>',
3649 '\r': '<CR>',
3650 '\f': '<FF>',
3651 u'\u00A0': '<NBSP>',
3652 u'\u1680': '<OGHAM_SPACE_MARK>',
3653 u'\u180E': '<MONGOLIAN_VOWEL_SEPARATOR>',
3654 u'\u2000': '<EN_QUAD>',
3655 u'\u2001': '<EM_QUAD>',
3656 u'\u2002': '<EN_SPACE>',
3657 u'\u2003': '<EM_SPACE>',
3658 u'\u2004': '<THREE-PER-EM_SPACE>',
3659 u'\u2005': '<FOUR-PER-EM_SPACE>',
3660 u'\u2006': '<SIX-PER-EM_SPACE>',
3661 u'\u2007': '<FIGURE_SPACE>',
3662 u'\u2008': '<PUNCTUATION_SPACE>',
3663 u'\u2009': '<THIN_SPACE>',
3664 u'\u200A': '<HAIR_SPACE>',
3665 u'\u200B': '<ZERO_WIDTH_SPACE>',
3666 u'\u202F': '<NNBSP>',
3667 u'\u205F': '<MMSP>',
3668 u'\u3000': '<IDEOGRAPHIC_SPACE>',
3669 }
3670 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3671 super(White, self).__init__()
3672 self.matchWhite = ws
3673 self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite))
3674 # ~ self.leaveWhitespace()
3675 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3676 self.mayReturnEmpty = True
3677 self.errmsg = "Expected " + self.name
3678
3679 self.minLen = min
3680
3681 if max > 0:
3682 self.maxLen = max
3683 else:
3684 self.maxLen = _MAX_INT
3685
3686 if exact > 0:
3687 self.maxLen = exact
3688 self.minLen = exact
3689
3690 def parseImpl(self, instring, loc, doActions=True):
3691 if instring[loc] not in self.matchWhite:
3692 raise ParseException(instring, loc, self.errmsg, self)
3693 start = loc
3694 loc += 1
3695 maxloc = start + self.maxLen
3696 maxloc = min(maxloc, len(instring))
3697 while loc < maxloc and instring[loc] in self.matchWhite:
3698 loc += 1
3699
3700 if loc - start < self.minLen:
3701 raise ParseException(instring, loc, self.errmsg, self)
3702
3703 return loc, instring[start:loc]
3704
3705
3706 class _PositionToken(Token):
3707 def __init__(self):
3708 super(_PositionToken, self).__init__()
3709 self.name = self.__class__.__name__
3710 self.mayReturnEmpty = True
3711 self.mayIndexError = False
3712
3713 class GoToColumn(_PositionToken):
3714 """Token to advance to a specific column of input text; useful for
3715 tabular report scraping.
3716 """
3717 def __init__(self, colno):
3718 super(GoToColumn, self).__init__()
3719 self.col = colno
3720
3721 def preParse(self, instring, loc):
3722 if col(loc, instring) != self.col:
3723 instrlen = len(instring)
3724 if self.ignoreExprs:
3725 loc = self._skipIgnorables(instring, loc)
3726 while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col:
3727 loc += 1
3728 return loc
3729
3730 def parseImpl(self, instring, loc, doActions=True):
3731 thiscol = col(loc, instring)
3732 if thiscol > self.col:
3733 raise ParseException(instring, loc, "Text not in expected column", self)
3734 newloc = loc + self.col - thiscol
3735 ret = instring[loc: newloc]
3736 return newloc, ret
3737
3738
3739 class LineStart(_PositionToken):
3740 r"""Matches if current position is at the beginning of a line within
3741 the parse string
3742
3743 Example::
3744
3745 test = '''\
3746 AAA this line
3747 AAA and this line
3748 AAA but not this one
3749 B AAA and definitely not this one
3750 '''
3751
3752 for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3753 print(t)
3754
3755 prints::
3756
3757 ['AAA', ' this line']
3758 ['AAA', ' and this line']
3759
3760 """
3761 def __init__(self):
3762 super(LineStart, self).__init__()
3763 self.errmsg = "Expected start of line"
3764
3765 def parseImpl(self, instring, loc, doActions=True):
3766 if col(loc, instring) == 1:
3767 return loc, []
3768 raise ParseException(instring, loc, self.errmsg, self)
3769
3770 class LineEnd(_PositionToken):
3771 """Matches if current position is at the end of a line within the
3772 parse string
3773 """
3774 def __init__(self):
3775 super(LineEnd, self).__init__()
3776 self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", ""))
3777 self.errmsg = "Expected end of line"
3778
3779 def parseImpl(self, instring, loc, doActions=True):
3780 if loc < len(instring):
3781 if instring[loc] == "\n":
3782 return loc + 1, "\n"
3783 else:
3784 raise ParseException(instring, loc, self.errmsg, self)
3785 elif loc == len(instring):
3786 return loc + 1, []
3787 else:
3788 raise ParseException(instring, loc, self.errmsg, self)
3789
3790 class StringStart(_PositionToken):
3791 """Matches if current position is at the beginning of the parse
3792 string
3793 """
3794 def __init__(self):
3795 super(StringStart, self).__init__()
3796 self.errmsg = "Expected start of text"
3797
3798 def parseImpl(self, instring, loc, doActions=True):
3799 if loc != 0:
3800 # see if entire string up to here is just whitespace and ignoreables
3801 if loc != self.preParse(instring, 0):
3802 raise ParseException(instring, loc, self.errmsg, self)
3803 return loc, []
3804
3805 class StringEnd(_PositionToken):
3806 """Matches if current position is at the end of the parse string
3807 """
3808 def __init__(self):
3809 super(StringEnd, self).__init__()
3810 self.errmsg = "Expected end of text"
3811
3812 def parseImpl(self, instring, loc, doActions=True):
3813 if loc < len(instring):
3814 raise ParseException(instring, loc, self.errmsg, self)
3815 elif loc == len(instring):
3816 return loc + 1, []
3817 elif loc > len(instring):
3818 return loc, []
3819 else:
3820 raise ParseException(instring, loc, self.errmsg, self)
3821
3822 class WordStart(_PositionToken):
3823 """Matches if the current position is at the beginning of a Word,
3824 and is not preceded by any character in a given set of
3825 ``wordChars`` (default= ``printables``). To emulate the
3826 ``\b`` behavior of regular expressions, use
3827 ``WordStart(alphanums)``. ``WordStart`` will also match at
3828 the beginning of the string being parsed, or at the beginning of
3829 a line.
3830 """
3831 def __init__(self, wordChars=printables):
3832 super(WordStart, self).__init__()
3833 self.wordChars = set(wordChars)
3834 self.errmsg = "Not at the start of a word"
3835
3836 def parseImpl(self, instring, loc, doActions=True):
3837 if loc != 0:
3838 if (instring[loc - 1] in self.wordChars
3839 or instring[loc] not in self.wordChars):
3840 raise ParseException(instring, loc, self.errmsg, self)
3841 return loc, []
3842
3843 class WordEnd(_PositionToken):
3844 """Matches if the current position is at the end of a Word, and is
3845 not followed by any character in a given set of ``wordChars``
3846 (default= ``printables``). To emulate the ``\b`` behavior of
3847 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3848 will also match at the end of the string being parsed, or at the end
3849 of a line.
3850 """
3851 def __init__(self, wordChars=printables):
3852 super(WordEnd, self).__init__()
3853 self.wordChars = set(wordChars)
3854 self.skipWhitespace = False
3855 self.errmsg = "Not at the end of a word"
3856
3857 def parseImpl(self, instring, loc, doActions=True):
3858 instrlen = len(instring)
3859 if instrlen > 0 and loc < instrlen:
3860 if (instring[loc] in self.wordChars or
3861 instring[loc - 1] not in self.wordChars):
3862 raise ParseException(instring, loc, self.errmsg, self)
3863 return loc, []
3864
3865
3866 class ParseExpression(ParserElement):
3867 """Abstract subclass of ParserElement, for combining and
3868 post-processing parsed tokens.
3869 """
3870 def __init__(self, exprs, savelist=False):
3871 super(ParseExpression, self).__init__(savelist)
3872 if isinstance(exprs, _generatorType):
3873 exprs = list(exprs)
3874
3875 if isinstance(exprs, basestring):
3876 self.exprs = [self._literalStringClass(exprs)]
3877 elif isinstance(exprs, ParserElement):
3878 self.exprs = [exprs]
3879 elif isinstance(exprs, Iterable):
3880 exprs = list(exprs)
3881 # if sequence of strings provided, wrap with Literal
3882 if any(isinstance(expr, basestring) for expr in exprs):
3883 exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs)
3884 self.exprs = list(exprs)
3885 else:
3886 try:
3887 self.exprs = list(exprs)
3888 except TypeError:
3889 self.exprs = [exprs]
3890 self.callPreparse = False
3891
3892 def append(self, other):
3893 self.exprs.append(other)
3894 self.strRepr = None
3895 return self
3896
3897 def leaveWhitespace(self):
3898 """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
3899 all contained expressions."""
3900 self.skipWhitespace = False
3901 self.exprs = [e.copy() for e in self.exprs]
3902 for e in self.exprs:
3903 e.leaveWhitespace()
3904 return self
3905
3906 def ignore(self, other):
3907 if isinstance(other, Suppress):
3908 if other not in self.ignoreExprs:
3909 super(ParseExpression, self).ignore(other)
3910 for e in self.exprs:
3911 e.ignore(self.ignoreExprs[-1])
3912 else:
3913 super(ParseExpression, self).ignore(other)
3914 for e in self.exprs:
3915 e.ignore(self.ignoreExprs[-1])
3916 return self
3917
3918 def __str__(self):
3919 try:
3920 return super(ParseExpression, self).__str__()
3921 except Exception:
3922 pass
3923
3924 if self.strRepr is None:
3925 self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs))
3926 return self.strRepr
3927
3928 def streamline(self):
3929 super(ParseExpression, self).streamline()
3930
3931 for e in self.exprs:
3932 e.streamline()
3933
3934 # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d)
3935 # but only if there are no parse actions or resultsNames on the nested And's
3936 # (likewise for Or's and MatchFirst's)
3937 if len(self.exprs) == 2:
3938 other = self.exprs[0]
3939 if (isinstance(other, self.__class__)
3940 and not other.parseAction
3941 and other.resultsName is None
3942 and not other.debug):
3943 self.exprs = other.exprs[:] + [self.exprs[1]]
3944 self.strRepr = None
3945 self.mayReturnEmpty |= other.mayReturnEmpty
3946 self.mayIndexError |= other.mayIndexError
3947
3948 other = self.exprs[-1]
3949 if (isinstance(other, self.__class__)
3950 and not other.parseAction
3951 and other.resultsName is None
3952 and not other.debug):
3953 self.exprs = self.exprs[:-1] + other.exprs[:]
3954 self.strRepr = None
3955 self.mayReturnEmpty |= other.mayReturnEmpty
3956 self.mayIndexError |= other.mayIndexError
3957
3958 self.errmsg = "Expected " + _ustr(self)
3959
3960 return self
3961
3962 def validate(self, validateTrace=None):
3963 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3964 for e in self.exprs:
3965 e.validate(tmp)
3966 self.checkRecursion([])
3967
3968 def copy(self):
3969 ret = super(ParseExpression, self).copy()
3970 ret.exprs = [e.copy() for e in self.exprs]
3971 return ret
3972
3973 def _setResultsName(self, name, listAllMatches=False):
3974 if __diag__.warn_ungrouped_named_tokens_in_collection:
3975 for e in self.exprs:
3976 if isinstance(e, ParserElement) and e.resultsName:
3977 warnings.warn("{0}: setting results name {1!r} on {2} expression "
3978 "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
3979 name,
3980 type(self).__name__,
3981 e.resultsName),
3982 stacklevel=3)
3983
3984 return super(ParseExpression, self)._setResultsName(name, listAllMatches)
3985
3986
3987 class And(ParseExpression):
3988 """
3989 Requires all given :class:`ParseExpression` s to be found in the given order.
3990 Expressions may be separated by whitespace.
3991 May be constructed using the ``'+'`` operator.
3992 May also be constructed using the ``'-'`` operator, which will
3993 suppress backtracking.
3994
3995 Example::
3996
3997 integer = Word(nums)
3998 name_expr = OneOrMore(Word(alphas))
3999
4000 expr = And([integer("id"), name_expr("name"), integer("age")])
4001 # more easily written as:
4002 expr = integer("id") + name_expr("name") + integer("age")
4003 """
4004
4005 class _ErrorStop(Empty):
4006 def __init__(self, *args, **kwargs):
4007 super(And._ErrorStop, self).__init__(*args, **kwargs)
4008 self.name = '-'
4009 self.leaveWhitespace()
4010
4011 def __init__(self, exprs, savelist=True):
4012 exprs = list(exprs)
4013 if exprs and Ellipsis in exprs:
4014 tmp = []
4015 for i, expr in enumerate(exprs):
4016 if expr is Ellipsis:
4017 if i < len(exprs) - 1:
4018 skipto_arg = (Empty() + exprs[i + 1]).exprs[-1]
4019 tmp.append(SkipTo(skipto_arg)("_skipped*"))
4020 else:
4021 raise Exception("cannot construct And with sequence ending in ...")
4022 else:
4023 tmp.append(expr)
4024 exprs[:] = tmp
4025 super(And, self).__init__(exprs, savelist)
4026 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4027 self.setWhitespaceChars(self.exprs[0].whiteChars)
4028 self.skipWhitespace = self.exprs[0].skipWhitespace
4029 self.callPreparse = True
4030
4031 def streamline(self):
4032 # collapse any _PendingSkip's
4033 if self.exprs:
4034 if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip)
4035 for e in self.exprs[:-1]):
4036 for i, e in enumerate(self.exprs[:-1]):
4037 if e is None:
4038 continue
4039 if (isinstance(e, ParseExpression)
4040 and e.exprs and isinstance(e.exprs[-1], _PendingSkip)):
4041 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4042 self.exprs[i + 1] = None
4043 self.exprs = [e for e in self.exprs if e is not None]
4044
4045 super(And, self).streamline()
4046 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4047 return self
4048
4049 def parseImpl(self, instring, loc, doActions=True):
4050 # pass False as last arg to _parse for first element, since we already
4051 # pre-parsed the string as part of our And pre-parsing
4052 loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False)
4053 errorStop = False
4054 for e in self.exprs[1:]:
4055 if isinstance(e, And._ErrorStop):
4056 errorStop = True
4057 continue
4058 if errorStop:
4059 try:
4060 loc, exprtokens = e._parse(instring, loc, doActions)
4061 except ParseSyntaxException:
4062 raise
4063 except ParseBaseException as pe:
4064 pe.__traceback__ = None
4065 raise ParseSyntaxException._from_exception(pe)
4066 except IndexError:
4067 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
4068 else:
4069 loc, exprtokens = e._parse(instring, loc, doActions)
4070 if exprtokens or exprtokens.haskeys():
4071 resultlist += exprtokens
4072 return loc, resultlist
4073
4074 def __iadd__(self, other):
4075 if isinstance(other, basestring):
4076 other = self._literalStringClass(other)
4077 return self.append(other) # And([self, other])
4078
4079 def checkRecursion(self, parseElementList):
4080 subRecCheckList = parseElementList[:] + [self]
4081 for e in self.exprs:
4082 e.checkRecursion(subRecCheckList)
4083 if not e.mayReturnEmpty:
4084 break
4085
4086 def __str__(self):
4087 if hasattr(self, "name"):
4088 return self.name
4089
4090 if self.strRepr is None:
4091 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
4092
4093 return self.strRepr
4094
4095
4096 class Or(ParseExpression):
4097 """Requires that at least one :class:`ParseExpression` is found. If
4098 two expressions match, the expression that matches the longest
4099 string will be used. May be constructed using the ``'^'``
4100 operator.
4101
4102 Example::
4103
4104 # construct Or using '^' operator
4105
4106 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4107 print(number.searchString("123 3.1416 789"))
4108
4109 prints::
4110
4111 [['123'], ['3.1416'], ['789']]
4112 """
4113 def __init__(self, exprs, savelist=False):
4114 super(Or, self).__init__(exprs, savelist)
4115 if self.exprs:
4116 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4117 else:
4118 self.mayReturnEmpty = True
4119
4120 def streamline(self):
4121 super(Or, self).streamline()
4122 if __compat__.collect_all_And_tokens:
4123 self.saveAsList = any(e.saveAsList for e in self.exprs)
4124 return self
4125
4126 def parseImpl(self, instring, loc, doActions=True):
4127 maxExcLoc = -1
4128 maxException = None
4129 matches = []
4130 for e in self.exprs:
4131 try:
4132 loc2 = e.tryParse(instring, loc)
4133 except ParseException as err:
4134 err.__traceback__ = None
4135 if err.loc > maxExcLoc:
4136 maxException = err
4137 maxExcLoc = err.loc
4138 except IndexError:
4139 if len(instring) > maxExcLoc:
4140 maxException = ParseException(instring, len(instring), e.errmsg, self)
4141 maxExcLoc = len(instring)
4142 else:
4143 # save match among all matches, to retry longest to shortest
4144 matches.append((loc2, e))
4145
4146 if matches:
4147 # re-evaluate all matches in descending order of length of match, in case attached actions
4148 # might change whether or how much they match of the input.
4149 matches.sort(key=itemgetter(0), reverse=True)
4150
4151 if not doActions:
4152 # no further conditions or parse actions to change the selection of
4153 # alternative, so the first match will be the best match
4154 best_expr = matches[0][1]
4155 return best_expr._parse(instring, loc, doActions)
4156
4157 longest = -1, None
4158 for loc1, expr1 in matches:
4159 if loc1 <= longest[0]:
4160 # already have a longer match than this one will deliver, we are done
4161 return longest
4162
4163 try:
4164 loc2, toks = expr1._parse(instring, loc, doActions)
4165 except ParseException as err:
4166 err.__traceback__ = None
4167 if err.loc > maxExcLoc:
4168 maxException = err
4169 maxExcLoc = err.loc
4170 else:
4171 if loc2 >= loc1:
4172 return loc2, toks
4173 # didn't match as much as before
4174 elif loc2 > longest[0]:
4175 longest = loc2, toks
4176
4177 if longest != (-1, None):
4178 return longest
4179
4180 if maxException is not None:
4181 maxException.msg = self.errmsg
4182 raise maxException
4183 else:
4184 raise ParseException(instring, loc, "no defined alternatives to match", self)
4185
4186
4187 def __ixor__(self, other):
4188 if isinstance(other, basestring):
4189 other = self._literalStringClass(other)
4190 return self.append(other) # Or([self, other])
4191
4192 def __str__(self):
4193 if hasattr(self, "name"):
4194 return self.name
4195
4196 if self.strRepr is None:
4197 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
4198
4199 return self.strRepr
4200
4201 def checkRecursion(self, parseElementList):
4202 subRecCheckList = parseElementList[:] + [self]
4203 for e in self.exprs:
4204 e.checkRecursion(subRecCheckList)
4205
4206 def _setResultsName(self, name, listAllMatches=False):
4207 if (not __compat__.collect_all_And_tokens
4208 and __diag__.warn_multiple_tokens_in_named_alternation):
4209 if any(isinstance(e, And) for e in self.exprs):
4210 warnings.warn("{0}: setting results name {1!r} on {2} expression "
4211 "may only return a single token for an And alternative, "
4212 "in future will return the full list of tokens".format(
4213 "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4214 stacklevel=3)
4215
4216 return super(Or, self)._setResultsName(name, listAllMatches)
4217
4218
4219 class MatchFirst(ParseExpression):
4220 """Requires that at least one :class:`ParseExpression` is found. If
4221 two expressions match, the first one listed is the one that will
4222 match. May be constructed using the ``'|'`` operator.
4223
4224 Example::
4225
4226 # construct MatchFirst using '|' operator
4227
4228 # watch the order of expressions to match
4229 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4230 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4231
4232 # put more selective expression first
4233 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4234 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4235 """
4236 def __init__(self, exprs, savelist=False):
4237 super(MatchFirst, self).__init__(exprs, savelist)
4238 if self.exprs:
4239 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4240 else:
4241 self.mayReturnEmpty = True
4242
4243 def streamline(self):
4244 super(MatchFirst, self).streamline()
4245 if __compat__.collect_all_And_tokens:
4246 self.saveAsList = any(e.saveAsList for e in self.exprs)
4247 return self
4248
4249 def parseImpl(self, instring, loc, doActions=True):
4250 maxExcLoc = -1
4251 maxException = None
4252 for e in self.exprs:
4253 try:
4254 ret = e._parse(instring, loc, doActions)
4255 return ret
4256 except ParseException as err:
4257 if err.loc > maxExcLoc:
4258 maxException = err
4259 maxExcLoc = err.loc
4260 except IndexError:
4261 if len(instring) > maxExcLoc:
4262 maxException = ParseException(instring, len(instring), e.errmsg, self)
4263 maxExcLoc = len(instring)
4264
4265 # only got here if no expression matched, raise exception for match that made it the furthest
4266 else:
4267 if maxException is not None:
4268 maxException.msg = self.errmsg
4269 raise maxException
4270 else:
4271 raise ParseException(instring, loc, "no defined alternatives to match", self)
4272
4273 def __ior__(self, other):
4274 if isinstance(other, basestring):
4275 other = self._literalStringClass(other)
4276 return self.append(other) # MatchFirst([self, other])
4277
4278 def __str__(self):
4279 if hasattr(self, "name"):
4280 return self.name
4281
4282 if self.strRepr is None:
4283 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
4284
4285 return self.strRepr
4286
4287 def checkRecursion(self, parseElementList):
4288 subRecCheckList = parseElementList[:] + [self]
4289 for e in self.exprs:
4290 e.checkRecursion(subRecCheckList)
4291
4292 def _setResultsName(self, name, listAllMatches=False):
4293 if (not __compat__.collect_all_And_tokens
4294 and __diag__.warn_multiple_tokens_in_named_alternation):
4295 if any(isinstance(e, And) for e in self.exprs):
4296 warnings.warn("{0}: setting results name {1!r} on {2} expression "
4297 "may only return a single token for an And alternative, "
4298 "in future will return the full list of tokens".format(
4299 "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4300 stacklevel=3)
4301
4302 return super(MatchFirst, self)._setResultsName(name, listAllMatches)
4303
4304
4305 class Each(ParseExpression):
4306 """Requires all given :class:`ParseExpression` s to be found, but in
4307 any order. Expressions may be separated by whitespace.
4308
4309 May be constructed using the ``'&'`` operator.
4310
4311 Example::
4312
4313 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4314 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4315 integer = Word(nums)
4316 shape_attr = "shape:" + shape_type("shape")
4317 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4318 color_attr = "color:" + color("color")
4319 size_attr = "size:" + integer("size")
4320
4321 # use Each (using operator '&') to accept attributes in any order
4322 # (shape and posn are required, color and size are optional)
4323 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
4324
4325 shape_spec.runTests('''
4326 shape: SQUARE color: BLACK posn: 100, 120
4327 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4328 color:GREEN size:20 shape:TRIANGLE posn:20,40
4329 '''
4330 )
4331
4332 prints::
4333
4334 shape: SQUARE color: BLACK posn: 100, 120
4335 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4336 - color: BLACK
4337 - posn: ['100', ',', '120']
4338 - x: 100
4339 - y: 120
4340 - shape: SQUARE
4341
4342
4343 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4344 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4345 - color: BLUE
4346 - posn: ['50', ',', '80']
4347 - x: 50
4348 - y: 80
4349 - shape: CIRCLE
4350 - size: 50
4351
4352
4353 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4354 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4355 - color: GREEN
4356 - posn: ['20', ',', '40']
4357 - x: 20
4358 - y: 40
4359 - shape: TRIANGLE
4360 - size: 20
4361 """
4362 def __init__(self, exprs, savelist=True):
4363 super(Each, self).__init__(exprs, savelist)
4364 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4365 self.skipWhitespace = True
4366 self.initExprGroups = True
4367 self.saveAsList = True
4368
4369 def streamline(self):
4370 super(Each, self).streamline()
4371 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4372 return self
4373
4374 def parseImpl(self, instring, loc, doActions=True):
4375 if self.initExprGroups:
4376 self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional))
4377 opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)]
4378 opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, (Optional, Regex))]
4379 self.optionals = opt1 + opt2
4380 self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)]
4381 self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)]
4382 self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))]
4383 self.required += self.multirequired
4384 self.initExprGroups = False
4385 tmpLoc = loc
4386 tmpReqd = self.required[:]
4387 tmpOpt = self.optionals[:]
4388 matchOrder = []
4389
4390 keepMatching = True
4391 while keepMatching:
4392 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
4393 failed = []
4394 for e in tmpExprs:
4395 try:
4396 tmpLoc = e.tryParse(instring, tmpLoc)
4397 except ParseException:
4398 failed.append(e)
4399 else:
4400 matchOrder.append(self.opt1map.get(id(e), e))
4401 if e in tmpReqd:
4402 tmpReqd.remove(e)
4403 elif e in tmpOpt:
4404 tmpOpt.remove(e)
4405 if len(failed) == len(tmpExprs):
4406 keepMatching = False
4407
4408 if tmpReqd:
4409 missing = ", ".join(_ustr(e) for e in tmpReqd)
4410 raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing)
4411
4412 # add any unmatched Optionals, in case they have default values defined
4413 matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt]
4414
4415 resultlist = []
4416 for e in matchOrder:
4417 loc, results = e._parse(instring, loc, doActions)
4418 resultlist.append(results)
4419
4420 finalResults = sum(resultlist, ParseResults([]))
4421 return loc, finalResults
4422
4423 def __str__(self):
4424 if hasattr(self, "name"):
4425 return self.name
4426
4427 if self.strRepr is None:
4428 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
4429
4430 return self.strRepr
4431
4432 def checkRecursion(self, parseElementList):
4433 subRecCheckList = parseElementList[:] + [self]
4434 for e in self.exprs:
4435 e.checkRecursion(subRecCheckList)
4436
4437
4438 class ParseElementEnhance(ParserElement):
4439 """Abstract subclass of :class:`ParserElement`, for combining and
4440 post-processing parsed tokens.
4441 """
4442 def __init__(self, expr, savelist=False):
4443 super(ParseElementEnhance, self).__init__(savelist)
4444 if isinstance(expr, basestring):
4445 if issubclass(self._literalStringClass, Token):
4446 expr = self._literalStringClass(expr)
4447 else:
4448 expr = self._literalStringClass(Literal(expr))
4449 self.expr = expr
4450 self.strRepr = None
4451 if expr is not None:
4452 self.mayIndexError = expr.mayIndexError
4453 self.mayReturnEmpty = expr.mayReturnEmpty
4454 self.setWhitespaceChars(expr.whiteChars)
4455 self.skipWhitespace = expr.skipWhitespace
4456 self.saveAsList = expr.saveAsList
4457 self.callPreparse = expr.callPreparse
4458 self.ignoreExprs.extend(expr.ignoreExprs)
4459
4460 def parseImpl(self, instring, loc, doActions=True):
4461 if self.expr is not None:
4462 return self.expr._parse(instring, loc, doActions, callPreParse=False)
4463 else:
4464 raise ParseException("", loc, self.errmsg, self)
4465
4466 def leaveWhitespace(self):
4467 self.skipWhitespace = False
4468 self.expr = self.expr.copy()
4469 if self.expr is not None:
4470 self.expr.leaveWhitespace()
4471 return self
4472
4473 def ignore(self, other):
4474 if isinstance(other, Suppress):
4475 if other not in self.ignoreExprs:
4476 super(ParseElementEnhance, self).ignore(other)
4477 if self.expr is not None:
4478 self.expr.ignore(self.ignoreExprs[-1])
4479 else:
4480 super(ParseElementEnhance, self).ignore(other)
4481 if self.expr is not None:
4482 self.expr.ignore(self.ignoreExprs[-1])
4483 return self
4484
4485 def streamline(self):
4486 super(ParseElementEnhance, self).streamline()
4487 if self.expr is not None:
4488 self.expr.streamline()
4489 return self
4490
4491 def checkRecursion(self, parseElementList):
4492 if self in parseElementList:
4493 raise RecursiveGrammarException(parseElementList + [self])
4494 subRecCheckList = parseElementList[:] + [self]
4495 if self.expr is not None:
4496 self.expr.checkRecursion(subRecCheckList)
4497
4498 def validate(self, validateTrace=None):
4499 if validateTrace is None:
4500 validateTrace = []
4501 tmp = validateTrace[:] + [self]
4502 if self.expr is not None:
4503 self.expr.validate(tmp)
4504 self.checkRecursion([])
4505
4506 def __str__(self):
4507 try:
4508 return super(ParseElementEnhance, self).__str__()
4509 except Exception:
4510 pass
4511
4512 if self.strRepr is None and self.expr is not None:
4513 self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr))
4514 return self.strRepr
4515
4516
4517 class FollowedBy(ParseElementEnhance):
4518 """Lookahead matching of the given parse expression.
4519 ``FollowedBy`` does *not* advance the parsing position within
4520 the input string, it only verifies that the specified parse
4521 expression matches at the current position. ``FollowedBy``
4522 always returns a null token list. If any results names are defined
4523 in the lookahead expression, those *will* be returned for access by
4524 name.
4525
4526 Example::
4527
4528 # use FollowedBy to match a label only if it is followed by a ':'
4529 data_word = Word(alphas)
4530 label = data_word + FollowedBy(':')
4531 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4532
4533 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
4534
4535 prints::
4536
4537 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4538 """
4539 def __init__(self, expr):
4540 super(FollowedBy, self).__init__(expr)
4541 self.mayReturnEmpty = True
4542
4543 def parseImpl(self, instring, loc, doActions=True):
4544 # by using self._expr.parse and deleting the contents of the returned ParseResults list
4545 # we keep any named results that were defined in the FollowedBy expression
4546 _, ret = self.expr._parse(instring, loc, doActions=doActions)
4547 del ret[:]
4548
4549 return loc, ret
4550
4551
4552 class PrecededBy(ParseElementEnhance):
4553 """Lookbehind matching of the given parse expression.
4554 ``PrecededBy`` does not advance the parsing position within the
4555 input string, it only verifies that the specified parse expression
4556 matches prior to the current position. ``PrecededBy`` always
4557 returns a null token list, but if a results name is defined on the
4558 given expression, it is returned.
4559
4560 Parameters:
4561
4562 - expr - expression that must match prior to the current parse
4563 location
4564 - retreat - (default= ``None``) - (int) maximum number of characters
4565 to lookbehind prior to the current parse location
4566
4567 If the lookbehind expression is a string, Literal, Keyword, or
4568 a Word or CharsNotIn with a specified exact or maximum length, then
4569 the retreat parameter is not required. Otherwise, retreat must be
4570 specified to give a maximum number of characters to look back from
4571 the current parse position for a lookbehind match.
4572
4573 Example::
4574
4575 # VB-style variable names with type prefixes
4576 int_var = PrecededBy("#") + pyparsing_common.identifier
4577 str_var = PrecededBy("$") + pyparsing_common.identifier
4578
4579 """
4580 def __init__(self, expr, retreat=None):
4581 super(PrecededBy, self).__init__(expr)
4582 self.expr = self.expr().leaveWhitespace()
4583 self.mayReturnEmpty = True
4584 self.mayIndexError = False
4585 self.exact = False
4586 if isinstance(expr, str):
4587 retreat = len(expr)
4588 self.exact = True
4589 elif isinstance(expr, (Literal, Keyword)):
4590 retreat = expr.matchLen
4591 self.exact = True
4592 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4593 retreat = expr.maxLen
4594 self.exact = True
4595 elif isinstance(expr, _PositionToken):
4596 retreat = 0
4597 self.exact = True
4598 self.retreat = retreat
4599 self.errmsg = "not preceded by " + str(expr)
4600 self.skipWhitespace = False
4601 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4602
4603 def parseImpl(self, instring, loc=0, doActions=True):
4604 if self.exact:
4605 if loc < self.retreat:
4606 raise ParseException(instring, loc, self.errmsg)
4607 start = loc - self.retreat
4608 _, ret = self.expr._parse(instring, start)
4609 else:
4610 # retreat specified a maximum lookbehind window, iterate
4611 test_expr = self.expr + StringEnd()
4612 instring_slice = instring[max(0, loc - self.retreat):loc]
4613 last_expr = ParseException(instring, loc, self.errmsg)
4614 for offset in range(1, min(loc, self.retreat + 1)+1):
4615 try:
4616 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4617 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
4618 except ParseBaseException as pbe:
4619 last_expr = pbe
4620 else:
4621 break
4622 else:
4623 raise last_expr
4624 return loc, ret
4625
4626
4627 class NotAny(ParseElementEnhance):
4628 """Lookahead to disallow matching with the given parse expression.
4629 ``NotAny`` does *not* advance the parsing position within the
4630 input string, it only verifies that the specified parse expression
4631 does *not* match at the current position. Also, ``NotAny`` does
4632 *not* skip over leading whitespace. ``NotAny`` always returns
4633 a null token list. May be constructed using the '~' operator.
4634
4635 Example::
4636
4637 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4638
4639 # take care not to mistake keywords for identifiers
4640 ident = ~(AND | OR | NOT) + Word(alphas)
4641 boolean_term = Optional(NOT) + ident
4642
4643 # very crude boolean expression - to support parenthesis groups and
4644 # operation hierarchy, use infixNotation
4645 boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
4646
4647 # integers that are followed by "." are actually floats
4648 integer = Word(nums) + ~Char(".")
4649 """
4650 def __init__(self, expr):
4651 super(NotAny, self).__init__(expr)
4652 # ~ self.leaveWhitespace()
4653 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
4654 self.mayReturnEmpty = True
4655 self.errmsg = "Found unwanted token, " + _ustr(self.expr)
4656
4657 def parseImpl(self, instring, loc, doActions=True):
4658 if self.expr.canParseNext(instring, loc):
4659 raise ParseException(instring, loc, self.errmsg, self)
4660 return loc, []
4661
4662 def __str__(self):
4663 if hasattr(self, "name"):
4664 return self.name
4665
4666 if self.strRepr is None:
4667 self.strRepr = "~{" + _ustr(self.expr) + "}"
4668
4669 return self.strRepr
4670
4671 class _MultipleMatch(ParseElementEnhance):
4672 def __init__(self, expr, stopOn=None):
4673 super(_MultipleMatch, self).__init__(expr)
4674 self.saveAsList = True
4675 ender = stopOn
4676 if isinstance(ender, basestring):
4677 ender = self._literalStringClass(ender)
4678 self.stopOn(ender)
4679
4680 def stopOn(self, ender):
4681 if isinstance(ender, basestring):
4682 ender = self._literalStringClass(ender)
4683 self.not_ender = ~ender if ender is not None else None
4684 return self
4685
4686 def parseImpl(self, instring, loc, doActions=True):
4687 self_expr_parse = self.expr._parse
4688 self_skip_ignorables = self._skipIgnorables
4689 check_ender = self.not_ender is not None
4690 if check_ender:
4691 try_not_ender = self.not_ender.tryParse
4692
4693 # must be at least one (but first see if we are the stopOn sentinel;
4694 # if so, fail)
4695 if check_ender:
4696 try_not_ender(instring, loc)
4697 loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False)
4698 try:
4699 hasIgnoreExprs = (not not self.ignoreExprs)
4700 while 1:
4701 if check_ender:
4702 try_not_ender(instring, loc)
4703 if hasIgnoreExprs:
4704 preloc = self_skip_ignorables(instring, loc)
4705 else:
4706 preloc = loc
4707 loc, tmptokens = self_expr_parse(instring, preloc, doActions)
4708 if tmptokens or tmptokens.haskeys():
4709 tokens += tmptokens
4710 except (ParseException, IndexError):
4711 pass
4712
4713 return loc, tokens
4714
4715 def _setResultsName(self, name, listAllMatches=False):
4716 if __diag__.warn_ungrouped_named_tokens_in_collection:
4717 for e in [self.expr] + getattr(self.expr, 'exprs', []):
4718 if isinstance(e, ParserElement) and e.resultsName:
4719 warnings.warn("{0}: setting results name {1!r} on {2} expression "
4720 "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
4721 name,
4722 type(self).__name__,
4723 e.resultsName),
4724 stacklevel=3)
4725
4726 return super(_MultipleMatch, self)._setResultsName(name, listAllMatches)
4727
4728
4729 class OneOrMore(_MultipleMatch):
4730 """Repetition of one or more of the given expression.
4731
4732 Parameters:
4733 - expr - expression that must match one or more times
4734 - stopOn - (default= ``None``) - expression for a terminating sentinel
4735 (only required if the sentinel would ordinarily match the repetition
4736 expression)
4737
4738 Example::
4739
4740 data_word = Word(alphas)
4741 label = data_word + FollowedBy(':')
4742 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4743
4744 text = "shape: SQUARE posn: upper left color: BLACK"
4745 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4746
4747 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
4748 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4749 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4750
4751 # could also be written as
4752 (attr_expr * (1,)).parseString(text).pprint()
4753 """
4754
4755 def __str__(self):
4756 if hasattr(self, "name"):
4757 return self.name
4758
4759 if self.strRepr is None:
4760 self.strRepr = "{" + _ustr(self.expr) + "}..."
4761
4762 return self.strRepr
4763
4764 class ZeroOrMore(_MultipleMatch):
4765 """Optional repetition of zero or more of the given expression.
4766
4767 Parameters:
4768 - expr - expression that must match zero or more times
4769 - stopOn - (default= ``None``) - expression for a terminating sentinel
4770 (only required if the sentinel would ordinarily match the repetition
4771 expression)
4772
4773 Example: similar to :class:`OneOrMore`
4774 """
4775 def __init__(self, expr, stopOn=None):
4776 super(ZeroOrMore, self).__init__(expr, stopOn=stopOn)
4777 self.mayReturnEmpty = True
4778
4779 def parseImpl(self, instring, loc, doActions=True):
4780 try:
4781 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
4782 except (ParseException, IndexError):
4783 return loc, []
4784
4785 def __str__(self):
4786 if hasattr(self, "name"):
4787 return self.name
4788
4789 if self.strRepr is None:
4790 self.strRepr = "[" + _ustr(self.expr) + "]..."
4791
4792 return self.strRepr
4793
4794
4795 class _NullToken(object):
4796 def __bool__(self):
4797 return False
4798 __nonzero__ = __bool__
4799 def __str__(self):
4800 return ""
4801
4802 class Optional(ParseElementEnhance):
4803 """Optional matching of the given expression.
4804
4805 Parameters:
4806 - expr - expression that must match zero or more times
4807 - default (optional) - value to be returned if the optional expression is not found.
4808
4809 Example::
4810
4811 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4812 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
4813 zip.runTests('''
4814 # traditional ZIP code
4815 12345
4816
4817 # ZIP+4 form
4818 12101-0001
4819
4820 # invalid ZIP
4821 98765-
4822 ''')
4823
4824 prints::
4825
4826 # traditional ZIP code
4827 12345
4828 ['12345']
4829
4830 # ZIP+4 form
4831 12101-0001
4832 ['12101-0001']
4833
4834 # invalid ZIP
4835 98765-
4836 ^
4837 FAIL: Expected end of text (at char 5), (line:1, col:6)
4838 """
4839 __optionalNotMatched = _NullToken()
4840
4841 def __init__(self, expr, default=__optionalNotMatched):
4842 super(Optional, self).__init__(expr, savelist=False)
4843 self.saveAsList = self.expr.saveAsList
4844 self.defaultValue = default
4845 self.mayReturnEmpty = True
4846
4847 def parseImpl(self, instring, loc, doActions=True):
4848 try:
4849 loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False)
4850 except (ParseException, IndexError):
4851 if self.defaultValue is not self.__optionalNotMatched:
4852 if self.expr.resultsName:
4853 tokens = ParseResults([self.defaultValue])
4854 tokens[self.expr.resultsName] = self.defaultValue
4855 else:
4856 tokens = [self.defaultValue]
4857 else:
4858 tokens = []
4859 return loc, tokens
4860
4861 def __str__(self):
4862 if hasattr(self, "name"):
4863 return self.name
4864
4865 if self.strRepr is None:
4866 self.strRepr = "[" + _ustr(self.expr) + "]"
4867
4868 return self.strRepr
4869
4870 class SkipTo(ParseElementEnhance):
4871 """Token for skipping over all undefined text until the matched
4872 expression is found.
4873
4874 Parameters:
4875 - expr - target expression marking the end of the data to be skipped
4876 - include - (default= ``False``) if True, the target expression is also parsed
4877 (the skipped text and target expression are returned as a 2-element list).
4878 - ignore - (default= ``None``) used to define grammars (typically quoted strings and
4879 comments) that might contain false matches to the target expression
4880 - failOn - (default= ``None``) define expressions that are not allowed to be
4881 included in the skipped test; if found before the target expression is found,
4882 the SkipTo is not a match
4883
4884 Example::
4885
4886 report = '''
4887 Outstanding Issues Report - 1 Jan 2000
4888
4889 # | Severity | Description | Days Open
4890 -----+----------+-------------------------------------------+-----------
4891 101 | Critical | Intermittent system crash | 6
4892 94 | Cosmetic | Spelling error on Login ('log|n') | 14
4893 79 | Minor | System slow when running too many reports | 47
4894 '''
4895 integer = Word(nums)
4896 SEP = Suppress('|')
4897 # use SkipTo to simply match everything up until the next SEP
4898 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4899 # - parse action will call token.strip() for each matched token, i.e., the description body
4900 string_data = SkipTo(SEP, ignore=quotedString)
4901 string_data.setParseAction(tokenMap(str.strip))
4902 ticket_expr = (integer("issue_num") + SEP
4903 + string_data("sev") + SEP
4904 + string_data("desc") + SEP
4905 + integer("days_open"))
4906
4907 for tkt in ticket_expr.searchString(report):
4908 print tkt.dump()
4909
4910 prints::
4911
4912 ['101', 'Critical', 'Intermittent system crash', '6']
4913 - days_open: 6
4914 - desc: Intermittent system crash
4915 - issue_num: 101
4916 - sev: Critical
4917 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4918 - days_open: 14
4919 - desc: Spelling error on Login ('log|n')
4920 - issue_num: 94
4921 - sev: Cosmetic
4922 ['79', 'Minor', 'System slow when running too many reports', '47']
4923 - days_open: 47
4924 - desc: System slow when running too many reports
4925 - issue_num: 79
4926 - sev: Minor
4927 """
4928 def __init__(self, other, include=False, ignore=None, failOn=None):
4929 super(SkipTo, self).__init__(other)
4930 self.ignoreExpr = ignore
4931 self.mayReturnEmpty = True
4932 self.mayIndexError = False
4933 self.includeMatch = include
4934 self.saveAsList = False
4935 if isinstance(failOn, basestring):
4936 self.failOn = self._literalStringClass(failOn)
4937 else:
4938 self.failOn = failOn
4939 self.errmsg = "No match found for " + _ustr(self.expr)
4940
4941 def parseImpl(self, instring, loc, doActions=True):
4942 startloc = loc
4943 instrlen = len(instring)
4944 expr = self.expr
4945 expr_parse = self.expr._parse
4946 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4947 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4948
4949 tmploc = loc
4950 while tmploc <= instrlen:
4951 if self_failOn_canParseNext is not None:
4952 # break if failOn expression matches
4953 if self_failOn_canParseNext(instring, tmploc):
4954 break
4955
4956 if self_ignoreExpr_tryParse is not None:
4957 # advance past ignore expressions
4958 while 1:
4959 try:
4960 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4961 except ParseBaseException:
4962 break
4963
4964 try:
4965 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4966 except (ParseException, IndexError):
4967 # no match, advance loc in string
4968 tmploc += 1
4969 else:
4970 # matched skipto expr, done
4971 break
4972
4973 else:
4974 # ran off the end of the input string without matching skipto expr, fail
4975 raise ParseException(instring, loc, self.errmsg, self)
4976
4977 # build up return values
4978 loc = tmploc
4979 skiptext = instring[startloc:loc]
4980 skipresult = ParseResults(skiptext)
4981
4982 if self.includeMatch:
4983 loc, mat = expr_parse(instring, loc, doActions, callPreParse=False)
4984 skipresult += mat
4985
4986 return loc, skipresult
4987
4988 class Forward(ParseElementEnhance):
4989 """Forward declaration of an expression to be defined later -
4990 used for recursive grammars, such as algebraic infix notation.
4991 When the expression is known, it is assigned to the ``Forward``
4992 variable using the '<<' operator.
4993
4994 Note: take care when assigning to ``Forward`` not to overlook
4995 precedence of operators.
4996
4997 Specifically, '|' has a lower precedence than '<<', so that::
4998
4999 fwdExpr << a | b | c
5000
5001 will actually be evaluated as::
5002
5003 (fwdExpr << a) | b | c
5004
5005 thereby leaving b and c out as parseable alternatives. It is recommended that you
5006 explicitly group the values inserted into the ``Forward``::
5007
5008 fwdExpr << (a | b | c)
5009
5010 Converting to use the '<<=' operator instead will avoid this problem.
5011
5012 See :class:`ParseResults.pprint` for an example of a recursive
5013 parser created using ``Forward``.
5014 """
5015 def __init__(self, other=None):
5016 super(Forward, self).__init__(other, savelist=False)
5017
5018 def __lshift__(self, other):
5019 if isinstance(other, basestring):
5020 other = self._literalStringClass(other)
5021 self.expr = other
5022 self.strRepr = None
5023 self.mayIndexError = self.expr.mayIndexError
5024 self.mayReturnEmpty = self.expr.mayReturnEmpty
5025 self.setWhitespaceChars(self.expr.whiteChars)
5026 self.skipWhitespace = self.expr.skipWhitespace
5027 self.saveAsList = self.expr.saveAsList
5028 self.ignoreExprs.extend(self.expr.ignoreExprs)
5029 return self
5030
5031 def __ilshift__(self, other):
5032 return self << other
5033
5034 def leaveWhitespace(self):
5035 self.skipWhitespace = False
5036 return self
5037
5038 def streamline(self):
5039 if not self.streamlined:
5040 self.streamlined = True
5041 if self.expr is not None:
5042 self.expr.streamline()
5043 return self
5044
5045 def validate(self, validateTrace=None):
5046 if validateTrace is None:
5047 validateTrace = []
5048
5049 if self not in validateTrace:
5050 tmp = validateTrace[:] + [self]
5051 if self.expr is not None:
5052 self.expr.validate(tmp)
5053 self.checkRecursion([])
5054
5055 def __str__(self):
5056 if hasattr(self, "name"):
5057 return self.name
5058 if self.strRepr is not None:
5059 return self.strRepr
5060
5061 # Avoid infinite recursion by setting a temporary strRepr
5062 self.strRepr = ": ..."
5063
5064 # Use the string representation of main expression.
5065 retString = '...'
5066 try:
5067 if self.expr is not None:
5068 retString = _ustr(self.expr)[:1000]
5069 else:
5070 retString = "None"
5071 finally:
5072 self.strRepr = self.__class__.__name__ + ": " + retString
5073 return self.strRepr
5074
5075 def copy(self):
5076 if self.expr is not None:
5077 return super(Forward, self).copy()
5078 else:
5079 ret = Forward()
5080 ret <<= self
5081 return ret
5082
5083 def _setResultsName(self, name, listAllMatches=False):
5084 if __diag__.warn_name_set_on_empty_Forward:
5085 if self.expr is None:
5086 warnings.warn("{0}: setting results name {0!r} on {1} expression "
5087 "that has no contained expression".format("warn_name_set_on_empty_Forward",
5088 name,
5089 type(self).__name__),
5090 stacklevel=3)
5091
5092 return super(Forward, self)._setResultsName(name, listAllMatches)
5093
5094 class TokenConverter(ParseElementEnhance):
5095 """
5096 Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5097 """
5098 def __init__(self, expr, savelist=False):
5099 super(TokenConverter, self).__init__(expr) # , savelist)
5100 self.saveAsList = False
5101
5102 class Combine(TokenConverter):
5103 """Converter to concatenate all matching tokens to a single string.
5104 By default, the matching patterns must also be contiguous in the
5105 input string; this can be disabled by specifying
5106 ``'adjacent=False'`` in the constructor.
5107
5108 Example::
5109
5110 real = Word(nums) + '.' + Word(nums)
5111 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
5112 # will also erroneously match the following
5113 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
5114
5115 real = Combine(Word(nums) + '.' + Word(nums))
5116 print(real.parseString('3.1416')) # -> ['3.1416']
5117 # no match when there are internal spaces
5118 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
5119 """
5120 def __init__(self, expr, joinString="", adjacent=True):
5121 super(Combine, self).__init__(expr)
5122 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5123 if adjacent:
5124 self.leaveWhitespace()
5125 self.adjacent = adjacent
5126 self.skipWhitespace = True
5127 self.joinString = joinString
5128 self.callPreparse = True
5129
5130 def ignore(self, other):
5131 if self.adjacent:
5132 ParserElement.ignore(self, other)
5133 else:
5134 super(Combine, self).ignore(other)
5135 return self
5136
5137 def postParse(self, instring, loc, tokenlist):
5138 retToks = tokenlist.copy()
5139 del retToks[:]
5140 retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults)
5141
5142 if self.resultsName and retToks.haskeys():
5143 return [retToks]
5144 else:
5145 return retToks
5146
5147 class Group(TokenConverter):
5148 """Converter to return the matched tokens as a list - useful for
5149 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5150
5151 Example::
5152
5153 ident = Word(alphas)
5154 num = Word(nums)
5155 term = ident | num
5156 func = ident + Optional(delimitedList(term))
5157 print(func.parseString("fn a, b, 100")) # -> ['fn', 'a', 'b', '100']
5158
5159 func = ident + Group(Optional(delimitedList(term)))
5160 print(func.parseString("fn a, b, 100")) # -> ['fn', ['a', 'b', '100']]
5161 """
5162 def __init__(self, expr):
5163 super(Group, self).__init__(expr)
5164 self.saveAsList = True
5165
5166 def postParse(self, instring, loc, tokenlist):
5167 return [tokenlist]
5168
5169 class Dict(TokenConverter):
5170 """Converter to return a repetitive expression as a list, but also
5171 as a dictionary. Each element can also be referenced using the first
5172 token in the expression as its key. Useful for tabular report
5173 scraping when the first column can be used as a item key.
5174
5175 Example::
5176
5177 data_word = Word(alphas)
5178 label = data_word + FollowedBy(':')
5179 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
5180
5181 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5182 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5183
5184 # print attributes as plain groups
5185 print(OneOrMore(attr_expr).parseString(text).dump())
5186
5187 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
5188 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
5189 print(result.dump())
5190
5191 # access named fields as dict entries, or output as dict
5192 print(result['shape'])
5193 print(result.asDict())
5194
5195 prints::
5196
5197 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5198 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5199 - color: light blue
5200 - posn: upper left
5201 - shape: SQUARE
5202 - texture: burlap
5203 SQUARE
5204 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5205
5206 See more examples at :class:`ParseResults` of accessing fields by results name.
5207 """
5208 def __init__(self, expr):
5209 super(Dict, self).__init__(expr)
5210 self.saveAsList = True
5211
5212 def postParse(self, instring, loc, tokenlist):
5213 for i, tok in enumerate(tokenlist):
5214 if len(tok) == 0:
5215 continue
5216 ikey = tok[0]
5217 if isinstance(ikey, int):
5218 ikey = _ustr(tok[0]).strip()
5219 if len(tok) == 1:
5220 tokenlist[ikey] = _ParseResultsWithOffset("", i)
5221 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5222 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5223 else:
5224 dictvalue = tok.copy() # ParseResults(i)
5225 del dictvalue[0]
5226 if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()):
5227 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5228 else:
5229 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5230
5231 if self.resultsName:
5232 return [tokenlist]
5233 else:
5234 return tokenlist
5235
5236
5237 class Suppress(TokenConverter):
5238 """Converter for ignoring the results of a parsed expression.
5239
5240 Example::
5241
5242 source = "a, b, c,d"
5243 wd = Word(alphas)
5244 wd_list1 = wd + ZeroOrMore(',' + wd)
5245 print(wd_list1.parseString(source))
5246
5247 # often, delimiters that are useful during parsing are just in the
5248 # way afterward - use Suppress to keep them out of the parsed output
5249 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
5250 print(wd_list2.parseString(source))
5251
5252 prints::
5253
5254 ['a', ',', 'b', ',', 'c', ',', 'd']
5255 ['a', 'b', 'c', 'd']
5256
5257 (See also :class:`delimitedList`.)
5258 """
5259 def postParse(self, instring, loc, tokenlist):
5260 return []
5261
5262 def suppress(self):
5263 return self
5264
5265
5266 class OnlyOnce(object):
5267 """Wrapper for parse actions, to ensure they are only called once.
5268 """
5269 def __init__(self, methodCall):
5270 self.callable = _trim_arity(methodCall)
5271 self.called = False
5272 def __call__(self, s, l, t):
5273 if not self.called:
5274 results = self.callable(s, l, t)
5275 self.called = True
5276 return results
5277 raise ParseException(s, l, "")
5278 def reset(self):
5279 self.called = False
5280
5281 def traceParseAction(f):
5282 """Decorator for debugging parse actions.
5283
5284 When the parse action is called, this decorator will print
5285 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5286 When the parse action completes, the decorator will print
5287 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
5288
5289 Example::
5290
5291 wd = Word(alphas)
5292
5293 @traceParseAction
5294 def remove_duplicate_chars(tokens):
5295 return ''.join(sorted(set(''.join(tokens))))
5296
5297 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
5298 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
5299
5300 prints::
5301
5302 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5303 <<leaving remove_duplicate_chars (ret: 'dfjkls')
5304 ['dfjkls']
5305 """
5306 f = _trim_arity(f)
5307 def z(*paArgs):
5308 thisFunc = f.__name__
5309 s, l, t = paArgs[-3:]
5310 if len(paArgs) > 3:
5311 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
5312 sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t))
5313 try:
5314 ret = f(*paArgs)
5315 except Exception as exc:
5316 sys.stderr.write("<<leaving %s (exception: %s)\n" % (thisFunc, exc))
5317 raise
5318 sys.stderr.write("<<leaving %s (ret: %r)\n" % (thisFunc, ret))
5319 return ret
5320 try:
5321 z.__name__ = f.__name__
5322 except AttributeError:
5323 pass
5324 return z
5325
5326 #
5327 # global helpers
5328 #
5329 def delimitedList(expr, delim=",", combine=False):
5330 """Helper to define a delimited list of expressions - the delimiter
5331 defaults to ','. By default, the list elements and delimiters can
5332 have intervening whitespace, and comments, but this can be
5333 overridden by passing ``combine=True`` in the constructor. If
5334 ``combine`` is set to ``True``, the matching tokens are
5335 returned as a single token string, with the delimiters included;
5336 otherwise, the matching tokens are returned as a list of tokens,
5337 with the delimiters suppressed.
5338
5339 Example::
5340
5341 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5342 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5343 """
5344 dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..."
5345 if combine:
5346 return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName)
5347 else:
5348 return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName)
5349
5350 def countedArray(expr, intExpr=None):
5351 """Helper to define a counted list of expressions.
5352
5353 This helper defines a pattern of the form::
5354
5355 integer expr expr expr...
5356
5357 where the leading integer tells how many expr expressions follow.
5358 The matched tokens returns the array of expr tokens as a list - the
5359 leading count token is suppressed.
5360
5361 If ``intExpr`` is specified, it should be a pyparsing expression
5362 that produces an integer value.
5363
5364 Example::
5365
5366 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
5367
5368 # in this parser, the leading integer value is given in binary,
5369 # '10' indicating that 2 values are in the array
5370 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
5371 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
5372 """
5373 arrayExpr = Forward()
5374 def countFieldParseAction(s, l, t):
5375 n = t[0]
5376 arrayExpr << (n and Group(And([expr] * n)) or Group(empty))
5377 return []
5378 if intExpr is None:
5379 intExpr = Word(nums).setParseAction(lambda t: int(t[0]))
5380 else:
5381 intExpr = intExpr.copy()
5382 intExpr.setName("arrayLen")
5383 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
5384 return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...')
5385
5386 def _flatten(L):
5387 ret = []
5388 for i in L:
5389 if isinstance(i, list):
5390 ret.extend(_flatten(i))
5391 else:
5392 ret.append(i)
5393 return ret
5394
5395 def matchPreviousLiteral(expr):
5396 """Helper to define an expression that is indirectly defined from
5397 the tokens matched in a previous expression, that is, it looks for
5398 a 'repeat' of a previous expression. For example::
5399
5400 first = Word(nums)
5401 second = matchPreviousLiteral(first)
5402 matchExpr = first + ":" + second
5403
5404 will match ``"1:1"``, but not ``"1:2"``. Because this
5405 matches a previous literal, will also match the leading
5406 ``"1:1"`` in ``"1:10"``. If this is not desired, use
5407 :class:`matchPreviousExpr`. Do *not* use with packrat parsing
5408 enabled.
5409 """
5410 rep = Forward()
5411 def copyTokenToRepeater(s, l, t):
5412 if t:
5413 if len(t) == 1:
5414 rep << t[0]
5415 else:
5416 # flatten t tokens
5417 tflat = _flatten(t.asList())
5418 rep << And(Literal(tt) for tt in tflat)
5419 else:
5420 rep << Empty()
5421 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5422 rep.setName('(prev) ' + _ustr(expr))
5423 return rep
5424
5425 def matchPreviousExpr(expr):
5426 """Helper to define an expression that is indirectly defined from
5427 the tokens matched in a previous expression, that is, it looks for
5428 a 'repeat' of a previous expression. For example::
5429
5430 first = Word(nums)
5431 second = matchPreviousExpr(first)
5432 matchExpr = first + ":" + second
5433
5434 will match ``"1:1"``, but not ``"1:2"``. Because this
5435 matches by expressions, will *not* match the leading ``"1:1"``
5436 in ``"1:10"``; the expressions are evaluated first, and then
5437 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
5438 with packrat parsing enabled.
5439 """
5440 rep = Forward()
5441 e2 = expr.copy()
5442 rep <<= e2
5443 def copyTokenToRepeater(s, l, t):
5444 matchTokens = _flatten(t.asList())
5445 def mustMatchTheseTokens(s, l, t):
5446 theseTokens = _flatten(t.asList())
5447 if theseTokens != matchTokens:
5448 raise ParseException('', 0, '')
5449 rep.setParseAction(mustMatchTheseTokens, callDuringTry=True)
5450 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5451 rep.setName('(prev) ' + _ustr(expr))
5452 return rep
5453
5454 def _escapeRegexRangeChars(s):
5455 # ~ escape these chars: ^-[]
5456 for c in r"\^-[]":
5457 s = s.replace(c, _bslash + c)
5458 s = s.replace("\n", r"\n")
5459 s = s.replace("\t", r"\t")
5460 return _ustr(s)
5461
5462 def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
5463 """Helper to quickly define a set of alternative Literals, and makes
5464 sure to do longest-first testing when there is a conflict,
5465 regardless of the input order, but returns
5466 a :class:`MatchFirst` for best performance.
5467
5468 Parameters:
5469
5470 - strs - a string of space-delimited literals, or a collection of
5471 string literals
5472 - caseless - (default= ``False``) - treat all literals as
5473 caseless
5474 - useRegex - (default= ``True``) - as an optimization, will
5475 generate a Regex object; otherwise, will generate
5476 a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
5477 creating a :class:`Regex` raises an exception)
5478 - asKeyword - (default=``False``) - enforce Keyword-style matching on the
5479 generated expressions
5480
5481 Example::
5482
5483 comp_oper = oneOf("< = > <= >= !=")
5484 var = Word(alphas)
5485 number = Word(nums)
5486 term = var | number
5487 comparison_expr = term + comp_oper + term
5488 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
5489
5490 prints::
5491
5492 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
5493 """
5494 if isinstance(caseless, basestring):
5495 warnings.warn("More than one string argument passed to oneOf, pass "
5496 "choices as a list or space-delimited string", stacklevel=2)
5497
5498 if caseless:
5499 isequal = (lambda a, b: a.upper() == b.upper())
5500 masks = (lambda a, b: b.upper().startswith(a.upper()))
5501 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
5502 else:
5503 isequal = (lambda a, b: a == b)
5504 masks = (lambda a, b: b.startswith(a))
5505 parseElementClass = Keyword if asKeyword else Literal
5506
5507 symbols = []
5508 if isinstance(strs, basestring):
5509 symbols = strs.split()
5510 elif isinstance(strs, Iterable):
5511 symbols = list(strs)
5512 else:
5513 warnings.warn("Invalid argument to oneOf, expected string or iterable",
5514 SyntaxWarning, stacklevel=2)
5515 if not symbols:
5516 return NoMatch()
5517
5518 if not asKeyword:
5519 # if not producing keywords, need to reorder to take care to avoid masking
5520 # longer choices with shorter ones
5521 i = 0
5522 while i < len(symbols) - 1:
5523 cur = symbols[i]
5524 for j, other in enumerate(symbols[i + 1:]):
5525 if isequal(other, cur):
5526 del symbols[i + j + 1]
5527 break
5528 elif masks(cur, other):
5529 del symbols[i + j + 1]
5530 symbols.insert(i, other)
5531 break
5532 else:
5533 i += 1
5534
5535 if not (caseless or asKeyword) and useRegex:
5536 # ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols]))
5537 try:
5538 if len(symbols) == len("".join(symbols)):
5539 return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols))
5540 else:
5541 return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols))
5542 except Exception:
5543 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
5544 SyntaxWarning, stacklevel=2)
5545
5546 # last resort, just use MatchFirst
5547 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
5548
5549 def dictOf(key, value):
5550 """Helper to easily and clearly define a dictionary by specifying
5551 the respective patterns for the key and value. Takes care of
5552 defining the :class:`Dict`, :class:`ZeroOrMore`, and
5553 :class:`Group` tokens in the proper order. The key pattern
5554 can include delimiting markers or punctuation, as long as they are
5555 suppressed, thereby leaving the significant key text. The value
5556 pattern can include named results, so that the :class:`Dict` results
5557 can include named token fields.
5558
5559 Example::
5560
5561 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5562 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5563 print(OneOrMore(attr_expr).parseString(text).dump())
5564
5565 attr_label = label
5566 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
5567
5568 # similar to Dict, but simpler call format
5569 result = dictOf(attr_label, attr_value).parseString(text)
5570 print(result.dump())
5571 print(result['shape'])
5572 print(result.shape) # object attribute access works too
5573 print(result.asDict())
5574
5575 prints::
5576
5577 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5578 - color: light blue
5579 - posn: upper left
5580 - shape: SQUARE
5581 - texture: burlap
5582 SQUARE
5583 SQUARE
5584 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
5585 """
5586 return Dict(OneOrMore(Group(key + value)))
5587
5588 def originalTextFor(expr, asString=True):
5589 """Helper to return the original, untokenized text for a given
5590 expression. Useful to restore the parsed fields of an HTML start
5591 tag into the raw tag text itself, or to revert separate tokens with
5592 intervening whitespace back to the original matching input text. By
5593 default, returns astring containing the original parsed text.
5594
5595 If the optional ``asString`` argument is passed as
5596 ``False``, then the return value is
5597 a :class:`ParseResults` containing any results names that
5598 were originally matched, and a single token containing the original
5599 matched text from the input string. So if the expression passed to
5600 :class:`originalTextFor` contains expressions with defined
5601 results names, you must set ``asString`` to ``False`` if you
5602 want to preserve those results name values.
5603
5604 Example::
5605
5606 src = "this is test <b> bold <i>text</i> </b> normal text "
5607 for tag in ("b", "i"):
5608 opener, closer = makeHTMLTags(tag)
5609 patt = originalTextFor(opener + SkipTo(closer) + closer)
5610 print(patt.searchString(src)[0])
5611
5612 prints::
5613
5614 ['<b> bold <i>text</i> </b>']
5615 ['<i>text</i>']
5616 """
5617 locMarker = Empty().setParseAction(lambda s, loc, t: loc)
5618 endlocMarker = locMarker.copy()
5619 endlocMarker.callPreparse = False
5620 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
5621 if asString:
5622 extractText = lambda s, l, t: s[t._original_start: t._original_end]
5623 else:
5624 def extractText(s, l, t):
5625 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
5626 matchExpr.setParseAction(extractText)
5627 matchExpr.ignoreExprs = expr.ignoreExprs
5628 return matchExpr
5629
5630 def ungroup(expr):
5631 """Helper to undo pyparsing's default grouping of And expressions,
5632 even if all but one are non-empty.
5633 """
5634 return TokenConverter(expr).addParseAction(lambda t: t[0])
5635
5636 def locatedExpr(expr):
5637 """Helper to decorate a returned token with its starting and ending
5638 locations in the input string.
5639
5640 This helper adds the following results names:
5641
5642 - locn_start = location where matched expression begins
5643 - locn_end = location where matched expression ends
5644 - value = the actual parsed results
5645
5646 Be careful if the input text contains ``<TAB>`` characters, you
5647 may want to call :class:`ParserElement.parseWithTabs`
5648
5649 Example::
5650
5651 wd = Word(alphas)
5652 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
5653 print(match)
5654
5655 prints::
5656
5657 [[0, 'ljsdf', 5]]
5658 [[8, 'lksdjjf', 15]]
5659 [[18, 'lkkjj', 23]]
5660 """
5661 locator = Empty().setParseAction(lambda s, l, t: l)
5662 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
5663
5664
5665 # convenience constants for positional expressions
5666 empty = Empty().setName("empty")
5667 lineStart = LineStart().setName("lineStart")
5668 lineEnd = LineEnd().setName("lineEnd")
5669 stringStart = StringStart().setName("stringStart")
5670 stringEnd = StringEnd().setName("stringEnd")
5671
5672 _escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1])
5673 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16)))
5674 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8)))
5675 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
5676 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
5677 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]"
5678
5679 def srange(s):
5680 r"""Helper to easily define string ranges for use in Word
5681 construction. Borrows syntax from regexp '[]' string range
5682 definitions::
5683
5684 srange("[0-9]") -> "0123456789"
5685 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
5686 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5687
5688 The input string must be enclosed in []'s, and the returned string
5689 is the expanded character set joined into a single string. The
5690 values enclosed in the []'s may be:
5691
5692 - a single character
5693 - an escaped character with a leading backslash (such as ``\-``
5694 or ``\]``)
5695 - an escaped hex character with a leading ``'\x'``
5696 (``\x21``, which is a ``'!'`` character) (``\0x##``
5697 is also supported for backwards compatibility)
5698 - an escaped octal character with a leading ``'\0'``
5699 (``\041``, which is a ``'!'`` character)
5700 - a range of any of the above, separated by a dash (``'a-z'``,
5701 etc.)
5702 - any combination of the above (``'aeiouy'``,
5703 ``'a-zA-Z0-9_$'``, etc.)
5704 """
5705 _expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
5706 try:
5707 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
5708 except Exception:
5709 return ""
5710
5711 def matchOnlyAtCol(n):
5712 """Helper method for defining parse actions that require matching at
5713 a specific column in the input text.
5714 """
5715 def verifyCol(strg, locn, toks):
5716 if col(locn, strg) != n:
5717 raise ParseException(strg, locn, "matched token not at column %d" % n)
5718 return verifyCol
5719
5720 def replaceWith(replStr):
5721 """Helper method for common parse actions that simply return
5722 a literal value. Especially useful when used with
5723 :class:`transformString<ParserElement.transformString>` ().
5724
5725 Example::
5726
5727 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
5728 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
5729 term = na | num
5730
5731 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
5732 """
5733 return lambda s, l, t: [replStr]
5734
5735 def removeQuotes(s, l, t):
5736 """Helper parse action for removing quotation marks from parsed
5737 quoted strings.
5738
5739 Example::
5740
5741 # by default, quotation marks are included in parsed results
5742 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
5743
5744 # use removeQuotes to strip quotation marks from parsed results
5745 quotedString.setParseAction(removeQuotes)
5746 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
5747 """
5748 return t[0][1:-1]
5749
5750 def tokenMap(func, *args):
5751 """Helper to define a parse action by mapping a function to all
5752 elements of a ParseResults list. If any additional args are passed,
5753 they are forwarded to the given function as additional arguments
5754 after the token, as in
5755 ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``,
5756 which will convert the parsed data to an integer using base 16.
5757
5758 Example (compare the last to example in :class:`ParserElement.transformString`::
5759
5760 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
5761 hex_ints.runTests('''
5762 00 11 22 aa FF 0a 0d 1a
5763 ''')
5764
5765 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
5766 OneOrMore(upperword).runTests('''
5767 my kingdom for a horse
5768 ''')
5769
5770 wd = Word(alphas).setParseAction(tokenMap(str.title))
5771 OneOrMore(wd).setParseAction(' '.join).runTests('''
5772 now is the winter of our discontent made glorious summer by this sun of york
5773 ''')
5774
5775 prints::
5776
5777 00 11 22 aa FF 0a 0d 1a
5778 [0, 17, 34, 170, 255, 10, 13, 26]
5779
5780 my kingdom for a horse
5781 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5782
5783 now is the winter of our discontent made glorious summer by this sun of york
5784 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5785 """
5786 def pa(s, l, t):
5787 return [func(tokn, *args) for tokn in t]
5788
5789 try:
5790 func_name = getattr(func, '__name__',
5791 getattr(func, '__class__').__name__)
5792 except Exception:
5793 func_name = str(func)
5794 pa.__name__ = func_name
5795
5796 return pa
5797
5798 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
5799 """(Deprecated) Helper parse action to convert tokens to upper case.
5800 Deprecated in favor of :class:`pyparsing_common.upcaseTokens`"""
5801
5802 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
5803 """(Deprecated) Helper parse action to convert tokens to lower case.
5804 Deprecated in favor of :class:`pyparsing_common.downcaseTokens`"""
5805
5806 def _makeTags(tagStr, xml,
5807 suppress_LT=Suppress("<"),
5808 suppress_GT=Suppress(">")):
5809 """Internal helper to construct opening and closing tag expressions, given a tag name"""
5810 if isinstance(tagStr, basestring):
5811 resname = tagStr
5812 tagStr = Keyword(tagStr, caseless=not xml)
5813 else:
5814 resname = tagStr.name
5815
5816 tagAttrName = Word(alphas, alphanums + "_-:")
5817 if xml:
5818 tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes)
5819 openTag = (suppress_LT
5820 + tagStr("tag")
5821 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
5822 + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/')
5823 + suppress_GT)
5824 else:
5825 tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printables, excludeChars=">")
5826 openTag = (suppress_LT
5827 + tagStr("tag")
5828 + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens)
5829 + Optional(Suppress("=") + tagAttrValue))))
5830 + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/')
5831 + suppress_GT)
5832 closeTag = Combine(_L("</") + tagStr + ">", adjacent=False)
5833
5834 openTag.setName("<%s>" % resname)
5835 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
5836 openTag.addParseAction(lambda t: t.__setitem__("start" + "".join(resname.replace(":", " ").title().split()), t.copy()))
5837 closeTag = closeTag("end" + "".join(resname.replace(":", " ").title().split())).setName("</%s>" % resname)
5838 openTag.tag = resname
5839 closeTag.tag = resname
5840 openTag.tag_body = SkipTo(closeTag())
5841 return openTag, closeTag
5842
5843 def makeHTMLTags(tagStr):
5844 """Helper to construct opening and closing tag expressions for HTML,
5845 given a tag name. Matches tags in either upper or lower case,
5846 attributes with namespaces and with quoted or unquoted values.
5847
5848 Example::
5849
5850 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
5851 # makeHTMLTags returns pyparsing expressions for the opening and
5852 # closing tags as a 2-tuple
5853 a, a_end = makeHTMLTags("A")
5854 link_expr = a + SkipTo(a_end)("link_text") + a_end
5855
5856 for link in link_expr.searchString(text):
5857 # attributes in the <A> tag (like "href" shown here) are
5858 # also accessible as named results
5859 print(link.link_text, '->', link.href)
5860
5861 prints::
5862
5863 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
5864 """
5865 return _makeTags(tagStr, False)
5866
5867 def makeXMLTags(tagStr):
5868 """Helper to construct opening and closing tag expressions for XML,
5869 given a tag name. Matches tags only in the given upper/lower case.
5870
5871 Example: similar to :class:`makeHTMLTags`
5872 """
5873 return _makeTags(tagStr, True)
5874
5875 def withAttribute(*args, **attrDict):
5876 """Helper to create a validating parse action to be used with start
5877 tags created with :class:`makeXMLTags` or
5878 :class:`makeHTMLTags`. Use ``withAttribute`` to qualify
5879 a starting tag with a required attribute value, to avoid false
5880 matches on common tags such as ``<TD>`` or ``<DIV>``.
5881
5882 Call ``withAttribute`` with a series of attribute names and
5883 values. Specify the list of filter attributes names and values as:
5884
5885 - keyword arguments, as in ``(align="right")``, or
5886 - as an explicit dict with ``**`` operator, when an attribute
5887 name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
5888 - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
5889
5890 For attribute names with a namespace prefix, you must use the second
5891 form. Attribute names are matched insensitive to upper/lower case.
5892
5893 If just testing for ``class`` (with or without a namespace), use
5894 :class:`withClass`.
5895
5896 To verify that the attribute exists, but without specifying a value,
5897 pass ``withAttribute.ANY_VALUE`` as the value.
5898
5899 Example::
5900
5901 html = '''
5902 <div>
5903 Some text
5904 <div type="grid">1 4 0 1 0</div>
5905 <div type="graph">1,3 2,3 1,1</div>
5906 <div>this has no type</div>
5907 </div>
5908
5909 '''
5910 div,div_end = makeHTMLTags("div")
5911
5912 # only match div tag having a type attribute with value "grid"
5913 div_grid = div().setParseAction(withAttribute(type="grid"))
5914 grid_expr = div_grid + SkipTo(div | div_end)("body")
5915 for grid_header in grid_expr.searchString(html):
5916 print(grid_header.body)
5917
5918 # construct a match with any div tag having a type attribute, regardless of the value
5919 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
5920 div_expr = div_any_type + SkipTo(div | div_end)("body")
5921 for div_header in div_expr.searchString(html):
5922 print(div_header.body)
5923
5924 prints::
5925
5926 1 4 0 1 0
5927
5928 1 4 0 1 0
5929 1,3 2,3 1,1
5930 """
5931 if args:
5932 attrs = args[:]
5933 else:
5934 attrs = attrDict.items()
5935 attrs = [(k, v) for k, v in attrs]
5936 def pa(s, l, tokens):
5937 for attrName, attrValue in attrs:
5938 if attrName not in tokens:
5939 raise ParseException(s, l, "no matching attribute " + attrName)
5940 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
5941 raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" %
5942 (attrName, tokens[attrName], attrValue))
5943 return pa
5944 withAttribute.ANY_VALUE = object()
5945
5946 def withClass(classname, namespace=''):
5947 """Simplified version of :class:`withAttribute` when
5948 matching on a div class - made difficult because ``class`` is
5949 a reserved word in Python.
5950
5951 Example::
5952
5953 html = '''
5954 <div>
5955 Some text
5956 <div class="grid">1 4 0 1 0</div>
5957 <div class="graph">1,3 2,3 1,1</div>
5958 <div>this &lt;div&gt; has no class</div>
5959 </div>
5960
5961 '''
5962 div,div_end = makeHTMLTags("div")
5963 div_grid = div().setParseAction(withClass("grid"))
5964
5965 grid_expr = div_grid + SkipTo(div | div_end)("body")
5966 for grid_header in grid_expr.searchString(html):
5967 print(grid_header.body)
5968
5969 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5970 div_expr = div_any_type + SkipTo(div | div_end)("body")
5971 for div_header in div_expr.searchString(html):
5972 print(div_header.body)
5973
5974 prints::
5975
5976 1 4 0 1 0
5977
5978 1 4 0 1 0
5979 1,3 2,3 1,1
5980 """
5981 classattr = "%s:class" % namespace if namespace else "class"
5982 return withAttribute(**{classattr: classname})
5983
5984 opAssoc = SimpleNamespace()
5985 opAssoc.LEFT = object()
5986 opAssoc.RIGHT = object()
5987
5988 def infixNotation(baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')')):
5989 """Helper method for constructing grammars of expressions made up of
5990 operators working in a precedence hierarchy. Operators may be unary
5991 or binary, left- or right-associative. Parse actions can also be
5992 attached to operator expressions. The generated parser will also
5993 recognize the use of parentheses to override operator precedences
5994 (see example below).
5995
5996 Note: if you define a deep operator list, you may see performance
5997 issues when using infixNotation. See
5998 :class:`ParserElement.enablePackrat` for a mechanism to potentially
5999 improve your parser performance.
6000
6001 Parameters:
6002 - baseExpr - expression representing the most basic element for the
6003 nested
6004 - opList - list of tuples, one for each operator precedence level
6005 in the expression grammar; each tuple is of the form ``(opExpr,
6006 numTerms, rightLeftAssoc, parseAction)``, where:
6007
6008 - opExpr is the pyparsing expression for the operator; may also
6009 be a string, which will be converted to a Literal; if numTerms
6010 is 3, opExpr is a tuple of two expressions, for the two
6011 operators separating the 3 terms
6012 - numTerms is the number of terms for this operator (must be 1,
6013 2, or 3)
6014 - rightLeftAssoc is the indicator whether the operator is right
6015 or left associative, using the pyparsing-defined constants
6016 ``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
6017 - parseAction is the parse action to be associated with
6018 expressions matching this operator expression (the parse action
6019 tuple member may be omitted); if the parse action is passed
6020 a tuple or list of functions, this is equivalent to calling
6021 ``setParseAction(*fn)``
6022 (:class:`ParserElement.setParseAction`)
6023 - lpar - expression for matching left-parentheses
6024 (default= ``Suppress('(')``)
6025 - rpar - expression for matching right-parentheses
6026 (default= ``Suppress(')')``)
6027
6028 Example::
6029
6030 # simple example of four-function arithmetic with ints and
6031 # variable names
6032 integer = pyparsing_common.signed_integer
6033 varname = pyparsing_common.identifier
6034
6035 arith_expr = infixNotation(integer | varname,
6036 [
6037 ('-', 1, opAssoc.RIGHT),
6038 (oneOf('* /'), 2, opAssoc.LEFT),
6039 (oneOf('+ -'), 2, opAssoc.LEFT),
6040 ])
6041
6042 arith_expr.runTests('''
6043 5+3*6
6044 (5+3)*6
6045 -2--11
6046 ''', fullDump=False)
6047
6048 prints::
6049
6050 5+3*6
6051 [[5, '+', [3, '*', 6]]]
6052
6053 (5+3)*6
6054 [[[5, '+', 3], '*', 6]]
6055
6056 -2--11
6057 [[['-', 2], '-', ['-', 11]]]
6058 """
6059 # captive version of FollowedBy that does not do parse actions or capture results names
6060 class _FB(FollowedBy):
6061 def parseImpl(self, instring, loc, doActions=True):
6062 self.expr.tryParse(instring, loc)
6063 return loc, []
6064
6065 ret = Forward()
6066 lastExpr = baseExpr | (lpar + ret + rpar)
6067 for i, operDef in enumerate(opList):
6068 opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4]
6069 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
6070 if arity == 3:
6071 if opExpr is None or len(opExpr) != 2:
6072 raise ValueError(
6073 "if numterms=3, opExpr must be a tuple or list of two expressions")
6074 opExpr1, opExpr2 = opExpr
6075 thisExpr = Forward().setName(termName)
6076 if rightLeftAssoc == opAssoc.LEFT:
6077 if arity == 1:
6078 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr))
6079 elif arity == 2:
6080 if opExpr is not None:
6081 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr))
6082 else:
6083 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr))
6084 elif arity == 3:
6085 matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr)
6086 + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)))
6087 else:
6088 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6089 elif rightLeftAssoc == opAssoc.RIGHT:
6090 if arity == 1:
6091 # try to avoid LR with this extra test
6092 if not isinstance(opExpr, Optional):
6093 opExpr = Optional(opExpr)
6094 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
6095 elif arity == 2:
6096 if opExpr is not None:
6097 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr))
6098 else:
6099 matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr))
6100 elif arity == 3:
6101 matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
6102 + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr))
6103 else:
6104 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6105 else:
6106 raise ValueError("operator must indicate right or left associativity")
6107 if pa:
6108 if isinstance(pa, (tuple, list)):
6109 matchExpr.setParseAction(*pa)
6110 else:
6111 matchExpr.setParseAction(pa)
6112 thisExpr <<= (matchExpr.setName(termName) | lastExpr)
6113 lastExpr = thisExpr
6114 ret <<= lastExpr
6115 return ret
6116
6117 operatorPrecedence = infixNotation
6118 """(Deprecated) Former name of :class:`infixNotation`, will be
6119 dropped in a future release."""
6120
6121 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes")
6122 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes")
6123 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6124 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes")
6125 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
6126
6127 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
6128 """Helper method for defining nested lists enclosed in opening and
6129 closing delimiters ("(" and ")" are the default).
6130
6131 Parameters:
6132 - opener - opening character for a nested list
6133 (default= ``"("``); can also be a pyparsing expression
6134 - closer - closing character for a nested list
6135 (default= ``")"``); can also be a pyparsing expression
6136 - content - expression for items within the nested lists
6137 (default= ``None``)
6138 - ignoreExpr - expression for ignoring opening and closing
6139 delimiters (default= :class:`quotedString`)
6140
6141 If an expression is not provided for the content argument, the
6142 nested expression will capture all whitespace-delimited content
6143 between delimiters as a list of separate values.
6144
6145 Use the ``ignoreExpr`` argument to define expressions that may
6146 contain opening or closing characters that should not be treated as
6147 opening or closing characters for nesting, such as quotedString or
6148 a comment expression. Specify multiple expressions using an
6149 :class:`Or` or :class:`MatchFirst`. The default is
6150 :class:`quotedString`, but if no expressions are to be ignored, then
6151 pass ``None`` for this argument.
6152
6153 Example::
6154
6155 data_type = oneOf("void int short long char float double")
6156 decl_data_type = Combine(data_type + Optional(Word('*')))
6157 ident = Word(alphas+'_', alphanums+'_')
6158 number = pyparsing_common.number
6159 arg = Group(decl_data_type + ident)
6160 LPAR, RPAR = map(Suppress, "()")
6161
6162 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
6163
6164 c_function = (decl_data_type("type")
6165 + ident("name")
6166 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
6167 + code_body("body"))
6168 c_function.ignore(cStyleComment)
6169
6170 source_code = '''
6171 int is_odd(int x) {
6172 return (x%2);
6173 }
6174
6175 int dec_to_hex(char hchar) {
6176 if (hchar >= '0' && hchar <= '9') {
6177 return (ord(hchar)-ord('0'));
6178 } else {
6179 return (10+ord(hchar)-ord('A'));
6180 }
6181 }
6182 '''
6183 for func in c_function.searchString(source_code):
6184 print("%(name)s (%(type)s) args: %(args)s" % func)
6185
6186
6187 prints::
6188
6189 is_odd (int) args: [['int', 'x']]
6190 dec_to_hex (int) args: [['char', 'hchar']]
6191 """
6192 if opener == closer:
6193 raise ValueError("opening and closing strings cannot be the same")
6194 if content is None:
6195 if isinstance(opener, basestring) and isinstance(closer, basestring):
6196 if len(opener) == 1 and len(closer) == 1:
6197 if ignoreExpr is not None:
6198 content = (Combine(OneOrMore(~ignoreExpr
6199 + CharsNotIn(opener
6200 + closer
6201 + ParserElement.DEFAULT_WHITE_CHARS, exact=1)
6202 )
6203 ).setParseAction(lambda t: t[0].strip()))
6204 else:
6205 content = (empty.copy() + CharsNotIn(opener
6206 + closer
6207 + ParserElement.DEFAULT_WHITE_CHARS
6208 ).setParseAction(lambda t: t[0].strip()))
6209 else:
6210 if ignoreExpr is not None:
6211 content = (Combine(OneOrMore(~ignoreExpr
6212 + ~Literal(opener)
6213 + ~Literal(closer)
6214 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6215 ).setParseAction(lambda t: t[0].strip()))
6216 else:
6217 content = (Combine(OneOrMore(~Literal(opener)
6218 + ~Literal(closer)
6219 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6220 ).setParseAction(lambda t: t[0].strip()))
6221 else:
6222 raise ValueError("opening and closing arguments must be strings if no content expression is given")
6223 ret = Forward()
6224 if ignoreExpr is not None:
6225 ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer))
6226 else:
6227 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
6228 ret.setName('nested %s%s expression' % (opener, closer))
6229 return ret
6230
6231 def indentedBlock(blockStatementExpr, indentStack, indent=True):
6232 """Helper method for defining space-delimited indentation blocks,
6233 such as those used to define block statements in Python source code.
6234
6235 Parameters:
6236
6237 - blockStatementExpr - expression defining syntax of statement that
6238 is repeated within the indented block
6239 - indentStack - list created by caller to manage indentation stack
6240 (multiple statementWithIndentedBlock expressions within a single
6241 grammar should share a common indentStack)
6242 - indent - boolean indicating whether block must be indented beyond
6243 the current level; set to False for block of left-most
6244 statements (default= ``True``)
6245
6246 A valid block must contain at least one ``blockStatement``.
6247
6248 Example::
6249
6250 data = '''
6251 def A(z):
6252 A1
6253 B = 100
6254 G = A2
6255 A2
6256 A3
6257 B
6258 def BB(a,b,c):
6259 BB1
6260 def BBA():
6261 bba1
6262 bba2
6263 bba3
6264 C
6265 D
6266 def spam(x,y):
6267 def eggs(z):
6268 pass
6269 '''
6270
6271
6272 indentStack = [1]
6273 stmt = Forward()
6274
6275 identifier = Word(alphas, alphanums)
6276 funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":")
6277 func_body = indentedBlock(stmt, indentStack)
6278 funcDef = Group(funcDecl + func_body)
6279
6280 rvalue = Forward()
6281 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
6282 rvalue << (funcCall | identifier | Word(nums))
6283 assignment = Group(identifier + "=" + rvalue)
6284 stmt << (funcDef | assignment | identifier)
6285
6286 module_body = OneOrMore(stmt)
6287
6288 parseTree = module_body.parseString(data)
6289 parseTree.pprint()
6290
6291 prints::
6292
6293 [['def',
6294 'A',
6295 ['(', 'z', ')'],
6296 ':',
6297 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
6298 'B',
6299 ['def',
6300 'BB',
6301 ['(', 'a', 'b', 'c', ')'],
6302 ':',
6303 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
6304 'C',
6305 'D',
6306 ['def',
6307 'spam',
6308 ['(', 'x', 'y', ')'],
6309 ':',
6310 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
6311 """
6312 backup_stack = indentStack[:]
6313
6314 def reset_stack():
6315 indentStack[:] = backup_stack
6316
6317 def checkPeerIndent(s, l, t):
6318 if l >= len(s): return
6319 curCol = col(l, s)
6320 if curCol != indentStack[-1]:
6321 if curCol > indentStack[-1]:
6322 raise ParseException(s, l, "illegal nesting")
6323 raise ParseException(s, l, "not a peer entry")
6324
6325 def checkSubIndent(s, l, t):
6326 curCol = col(l, s)
6327 if curCol > indentStack[-1]:
6328 indentStack.append(curCol)
6329 else:
6330 raise ParseException(s, l, "not a subentry")
6331
6332 def checkUnindent(s, l, t):
6333 if l >= len(s): return
6334 curCol = col(l, s)
6335 if not(indentStack and curCol in indentStack):
6336 raise ParseException(s, l, "not an unindent")
6337 if curCol < indentStack[-1]:
6338 indentStack.pop()
6339
6340 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd())
6341 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
6342 PEER = Empty().setParseAction(checkPeerIndent).setName('')
6343 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
6344 if indent:
6345 smExpr = Group(Optional(NL)
6346 + INDENT
6347 + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6348 + UNDENT)
6349 else:
6350 smExpr = Group(Optional(NL)
6351 + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6352 + UNDENT)
6353 smExpr.setFailAction(lambda a, b, c, d: reset_stack())
6354 blockStatementExpr.ignore(_bslash + LineEnd())
6355 return smExpr.setName('indented block')
6356
6357 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6358 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6359
6360 anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag'))
6361 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\''))
6362 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
6363 def replaceHTMLEntity(t):
6364 """Helper parser action to replace common HTML entities with their special characters"""
6365 return _htmlEntityMap.get(t.entity)
6366
6367 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
6368 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
6369 "Comment of the form ``/* ... */``"
6370
6371 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
6372 "Comment of the form ``<!-- ... -->``"
6373
6374 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
6375 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
6376 "Comment of the form ``// ... (to end of line)``"
6377
6378 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment")
6379 "Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"
6380
6381 javaStyleComment = cppStyleComment
6382 "Same as :class:`cppStyleComment`"
6383
6384 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
6385 "Comment of the form ``# ... (to end of line)``"
6386
6387 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',')
6388 + Optional(Word(" \t")
6389 + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem")
6390 commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList")
6391 """(Deprecated) Predefined expression of 1 or more printable words or
6392 quoted strings, separated by commas.
6393
6394 This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`.
6395 """
6396
6397 # some other useful expressions - using lower-case class name since we are really using this as a namespace
6398 class pyparsing_common:
6399 """Here are some common low-level expressions that may be useful in
6400 jump-starting parser development:
6401
6402 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
6403 :class:`scientific notation<sci_real>`)
6404 - common :class:`programming identifiers<identifier>`
6405 - network addresses (:class:`MAC<mac_address>`,
6406 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
6407 - ISO8601 :class:`dates<iso8601_date>` and
6408 :class:`datetime<iso8601_datetime>`
6409 - :class:`UUID<uuid>`
6410 - :class:`comma-separated list<comma_separated_list>`
6411
6412 Parse actions:
6413
6414 - :class:`convertToInteger`
6415 - :class:`convertToFloat`
6416 - :class:`convertToDate`
6417 - :class:`convertToDatetime`
6418 - :class:`stripHTMLTags`
6419 - :class:`upcaseTokens`
6420 - :class:`downcaseTokens`
6421
6422 Example::
6423
6424 pyparsing_common.number.runTests('''
6425 # any int or real number, returned as the appropriate type
6426 100
6427 -100
6428 +100
6429 3.14159
6430 6.02e23
6431 1e-12
6432 ''')
6433
6434 pyparsing_common.fnumber.runTests('''
6435 # any int or real number, returned as float
6436 100
6437 -100
6438 +100
6439 3.14159
6440 6.02e23
6441 1e-12
6442 ''')
6443
6444 pyparsing_common.hex_integer.runTests('''
6445 # hex numbers
6446 100
6447 FF
6448 ''')
6449
6450 pyparsing_common.fraction.runTests('''
6451 # fractions
6452 1/2
6453 -3/4
6454 ''')
6455
6456 pyparsing_common.mixed_integer.runTests('''
6457 # mixed fractions
6458 1
6459 1/2
6460 -3/4
6461 1-3/4
6462 ''')
6463
6464 import uuid
6465 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
6466 pyparsing_common.uuid.runTests('''
6467 # uuid
6468 12345678-1234-5678-1234-567812345678
6469 ''')
6470
6471 prints::
6472
6473 # any int or real number, returned as the appropriate type
6474 100
6475 [100]
6476
6477 -100
6478 [-100]
6479
6480 +100
6481 [100]
6482
6483 3.14159
6484 [3.14159]
6485
6486 6.02e23
6487 [6.02e+23]
6488
6489 1e-12
6490 [1e-12]
6491
6492 # any int or real number, returned as float
6493 100
6494 [100.0]
6495
6496 -100
6497 [-100.0]
6498
6499 +100
6500 [100.0]
6501
6502 3.14159
6503 [3.14159]
6504
6505 6.02e23
6506 [6.02e+23]
6507
6508 1e-12
6509 [1e-12]
6510
6511 # hex numbers
6512 100
6513 [256]
6514
6515 FF
6516 [255]
6517
6518 # fractions
6519 1/2
6520 [0.5]
6521
6522 -3/4
6523 [-0.75]
6524
6525 # mixed fractions
6526 1
6527 [1]
6528
6529 1/2
6530 [0.5]
6531
6532 -3/4
6533 [-0.75]
6534
6535 1-3/4
6536 [1.75]
6537
6538 # uuid
6539 12345678-1234-5678-1234-567812345678
6540 [UUID('12345678-1234-5678-1234-567812345678')]
6541 """
6542
6543 convertToInteger = tokenMap(int)
6544 """
6545 Parse action for converting parsed integers to Python int
6546 """
6547
6548 convertToFloat = tokenMap(float)
6549 """
6550 Parse action for converting parsed numbers to Python float
6551 """
6552
6553 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
6554 """expression that parses an unsigned integer, returns an int"""
6555
6556 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16))
6557 """expression that parses a hexadecimal integer, returns an int"""
6558
6559 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
6560 """expression that parses an integer with optional leading sign, returns an int"""
6561
6562 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
6563 """fractional expression of an integer divided by an integer, returns a float"""
6564 fraction.addParseAction(lambda t: t[0]/t[-1])
6565
6566 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
6567 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
6568 mixed_integer.addParseAction(sum)
6569
6570 real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat)
6571 """expression that parses a floating point number and returns a float"""
6572
6573 sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
6574 """expression that parses a floating point number with optional
6575 scientific notation and returns a float"""
6576
6577 # streamlining this expression makes the docs nicer-looking
6578 number = (sci_real | real | signed_integer).streamline()
6579 """any numeric expression, returns the corresponding Python type"""
6580
6581 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
6582 """any int or real number, returned as float"""
6583
6584 identifier = Word(alphas + '_', alphanums + '_').setName("identifier")
6585 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
6586
6587 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
6588 "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
6589
6590 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
6591 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address")
6592 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6593 + "::"
6594 + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6595 ).setName("short IPv6 address")
6596 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
6597 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
6598 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
6599 "IPv6 address (long, short, or mixed form)"
6600
6601 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
6602 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
6603
6604 @staticmethod
6605 def convertToDate(fmt="%Y-%m-%d"):
6606 """
6607 Helper to create a parse action for converting parsed date string to Python datetime.date
6608
6609 Params -
6610 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
6611
6612 Example::
6613
6614 date_expr = pyparsing_common.iso8601_date.copy()
6615 date_expr.setParseAction(pyparsing_common.convertToDate())
6616 print(date_expr.parseString("1999-12-31"))
6617
6618 prints::
6619
6620 [datetime.date(1999, 12, 31)]
6621 """
6622 def cvt_fn(s, l, t):
6623 try:
6624 return datetime.strptime(t[0], fmt).date()
6625 except ValueError as ve:
6626 raise ParseException(s, l, str(ve))
6627 return cvt_fn
6628
6629 @staticmethod
6630 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
6631 """Helper to create a parse action for converting parsed
6632 datetime string to Python datetime.datetime
6633
6634 Params -
6635 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
6636
6637 Example::
6638
6639 dt_expr = pyparsing_common.iso8601_datetime.copy()
6640 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
6641 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
6642
6643 prints::
6644
6645 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
6646 """
6647 def cvt_fn(s, l, t):
6648 try:
6649 return datetime.strptime(t[0], fmt)
6650 except ValueError as ve:
6651 raise ParseException(s, l, str(ve))
6652 return cvt_fn
6653
6654 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
6655 "ISO8601 date (``yyyy-mm-dd``)"
6656
6657 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
6658 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
6659
6660 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
6661 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
6662
6663 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
6664 @staticmethod
6665 def stripHTMLTags(s, l, tokens):
6666 """Parse action to remove HTML tags from web page HTML source
6667
6668 Example::
6669
6670 # strip HTML links from normal text
6671 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
6672 td, td_end = makeHTMLTags("TD")
6673 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
6674 print(table_text.parseString(text).body)
6675
6676 Prints::
6677
6678 More info at the pyparsing wiki page
6679 """
6680 return pyparsing_common._html_stripper.transformString(tokens[0])
6681
6682 _commasepitem = Combine(OneOrMore(~Literal(",")
6683 + ~LineEnd()
6684 + Word(printables, excludeChars=',')
6685 + Optional(White(" \t")))).streamline().setName("commaItem")
6686 comma_separated_list = delimitedList(Optional(quotedString.copy()
6687 | _commasepitem, default='')
6688 ).setName("comma separated list")
6689 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
6690
6691 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
6692 """Parse action to convert tokens to upper case."""
6693
6694 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
6695 """Parse action to convert tokens to lower case."""
6696
6697
6698 class _lazyclassproperty(object):
6699 def __init__(self, fn):
6700 self.fn = fn
6701 self.__doc__ = fn.__doc__
6702 self.__name__ = fn.__name__
6703
6704 def __get__(self, obj, cls):
6705 if cls is None:
6706 cls = type(obj)
6707 if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', [])
6708 for superclass in cls.__mro__[1:]):
6709 cls._intern = {}
6710 attrname = self.fn.__name__
6711 if attrname not in cls._intern:
6712 cls._intern[attrname] = self.fn(cls)
6713 return cls._intern[attrname]
6714
6715
6716 class unicode_set(object):
6717 """
6718 A set of Unicode characters, for language-specific strings for
6719 ``alphas``, ``nums``, ``alphanums``, and ``printables``.
6720 A unicode_set is defined by a list of ranges in the Unicode character
6721 set, in a class attribute ``_ranges``, such as::
6722
6723 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6724
6725 A unicode set can also be defined using multiple inheritance of other unicode sets::
6726
6727 class CJK(Chinese, Japanese, Korean):
6728 pass
6729 """
6730 _ranges = []
6731
6732 @classmethod
6733 def _get_chars_for_ranges(cls):
6734 ret = []
6735 for cc in cls.__mro__:
6736 if cc is unicode_set:
6737 break
6738 for rr in cc._ranges:
6739 ret.extend(range(rr[0], rr[-1] + 1))
6740 return [unichr(c) for c in sorted(set(ret))]
6741
6742 @_lazyclassproperty
6743 def printables(cls):
6744 "all non-whitespace characters in this range"
6745 return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges()))
6746
6747 @_lazyclassproperty
6748 def alphas(cls):
6749 "all alphabetic characters in this range"
6750 return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges()))
6751
6752 @_lazyclassproperty
6753 def nums(cls):
6754 "all numeric digit characters in this range"
6755 return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges()))
6756
6757 @_lazyclassproperty
6758 def alphanums(cls):
6759 "all alphanumeric characters in this range"
6760 return cls.alphas + cls.nums
6761
6762
6763 class pyparsing_unicode(unicode_set):
6764 """
6765 A namespace class for defining common language unicode_sets.
6766 """
6767 _ranges = [(32, sys.maxunicode)]
6768
6769 class Latin1(unicode_set):
6770 "Unicode set for Latin-1 Unicode Character Range"
6771 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6772
6773 class LatinA(unicode_set):
6774 "Unicode set for Latin-A Unicode Character Range"
6775 _ranges = [(0x0100, 0x017f),]
6776
6777 class LatinB(unicode_set):
6778 "Unicode set for Latin-B Unicode Character Range"
6779 _ranges = [(0x0180, 0x024f),]
6780
6781 class Greek(unicode_set):
6782 "Unicode set for Greek Unicode Character Ranges"
6783 _ranges = [
6784 (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
6785 (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
6786 (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
6787 ]
6788
6789 class Cyrillic(unicode_set):
6790 "Unicode set for Cyrillic Unicode Character Range"
6791 _ranges = [(0x0400, 0x04ff)]
6792
6793 class Chinese(unicode_set):
6794 "Unicode set for Chinese Unicode Character Range"
6795 _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),]
6796
6797 class Japanese(unicode_set):
6798 "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
6799 _ranges = []
6800
6801 class Kanji(unicode_set):
6802 "Unicode set for Kanji Unicode Character Range"
6803 _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),]
6804
6805 class Hiragana(unicode_set):
6806 "Unicode set for Hiragana Unicode Character Range"
6807 _ranges = [(0x3040, 0x309f),]
6808
6809 class Katakana(unicode_set):
6810 "Unicode set for Katakana Unicode Character Range"
6811 _ranges = [(0x30a0, 0x30ff),]
6812
6813 class Korean(unicode_set):
6814 "Unicode set for Korean Unicode Character Range"
6815 _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),]
6816
6817 class CJK(Chinese, Japanese, Korean):
6818 "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
6819 pass
6820
6821 class Thai(unicode_set):
6822 "Unicode set for Thai Unicode Character Range"
6823 _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),]
6824
6825 class Arabic(unicode_set):
6826 "Unicode set for Arabic Unicode Character Range"
6827 _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),]
6828
6829 class Hebrew(unicode_set):
6830 "Unicode set for Hebrew Unicode Character Range"
6831 _ranges = [(0x0590, 0x05ff),]
6832
6833 class Devanagari(unicode_set):
6834 "Unicode set for Devanagari Unicode Character Range"
6835 _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
6836
6837 pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges
6838 + pyparsing_unicode.Japanese.Hiragana._ranges
6839 + pyparsing_unicode.Japanese.Katakana._ranges)
6840
6841 # define ranges in language character sets
6842 if PY_3:
6843 setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic)
6844 setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese)
6845 setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic)
6846 setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek)
6847 setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew)
6848 setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese)
6849 setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji)
6850 setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana)
6851 setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana)
6852 setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean)
6853 setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai)
6854 setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari)
6855
6856
6857 class pyparsing_test:
6858 """
6859 namespace class for classes useful in writing unit tests
6860 """
6861
6862 class reset_pyparsing_context:
6863 """
6864 Context manager to be used when writing unit tests that modify pyparsing config values:
6865 - packrat parsing
6866 - default whitespace characters.
6867 - default keyword characters
6868 - literal string auto-conversion class
6869 - __diag__ settings
6870
6871 Example:
6872 with reset_pyparsing_context():
6873 # test that literals used to construct a grammar are automatically suppressed
6874 ParserElement.inlineLiteralsUsing(Suppress)
6875
6876 term = Word(alphas) | Word(nums)
6877 group = Group('(' + term[...] + ')')
6878
6879 # assert that the '()' characters are not included in the parsed tokens
6880 self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def'])
6881
6882 # after exiting context manager, literals are converted to Literal expressions again
6883 """
6884
6885 def __init__(self):
6886 self._save_context = {}
6887
6888 def save(self):
6889 self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
6890 self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
6891 self._save_context[
6892 "literal_string_class"
6893 ] = ParserElement._literalStringClass
6894 self._save_context["packrat_enabled"] = ParserElement._packratEnabled
6895 self._save_context["packrat_parse"] = ParserElement._parse
6896 self._save_context["__diag__"] = {
6897 name: getattr(__diag__, name) for name in __diag__._all_names
6898 }
6899 self._save_context["__compat__"] = {
6900 "collect_all_And_tokens": __compat__.collect_all_And_tokens
6901 }
6902 return self
6903
6904 def restore(self):
6905 # reset pyparsing global state
6906 if (
6907 ParserElement.DEFAULT_WHITE_CHARS
6908 != self._save_context["default_whitespace"]
6909 ):
6910 ParserElement.setDefaultWhitespaceChars(
6911 self._save_context["default_whitespace"]
6912 )
6913 Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
6914 ParserElement.inlineLiteralsUsing(
6915 self._save_context["literal_string_class"]
6916 )
6917 for name, value in self._save_context["__diag__"].items():
6918 setattr(__diag__, name, value)
6919 ParserElement._packratEnabled = self._save_context["packrat_enabled"]
6920 ParserElement._parse = self._save_context["packrat_parse"]
6921 __compat__.collect_all_And_tokens = self._save_context["__compat__"]
6922
6923 def __enter__(self):
6924 return self.save()
6925
6926 def __exit__(self, *args):
6927 return self.restore()
6928
6929 class TestParseResultsAsserts:
6930 """
6931 A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
6932 """
6933 def assertParseResultsEquals(
6934 self, result, expected_list=None, expected_dict=None, msg=None
6935 ):
6936 """
6937 Unit test assertion to compare a ParseResults object with an optional expected_list,
6938 and compare any defined results names with an optional expected_dict.
6939 """
6940 if expected_list is not None:
6941 self.assertEqual(expected_list, result.asList(), msg=msg)
6942 if expected_dict is not None:
6943 self.assertEqual(expected_dict, result.asDict(), msg=msg)
6944
6945 def assertParseAndCheckList(
6946 self, expr, test_string, expected_list, msg=None, verbose=True
6947 ):
6948 """
6949 Convenience wrapper assert to test a parser element and input string, and assert that
6950 the resulting ParseResults.asList() is equal to the expected_list.
6951 """
6952 result = expr.parseString(test_string, parseAll=True)
6953 if verbose:
6954 print(result.dump())
6955 self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
6956
6957 def assertParseAndCheckDict(
6958 self, expr, test_string, expected_dict, msg=None, verbose=True
6959 ):
6960 """
6961 Convenience wrapper assert to test a parser element and input string, and assert that
6962 the resulting ParseResults.asDict() is equal to the expected_dict.
6963 """
6964 result = expr.parseString(test_string, parseAll=True)
6965 if verbose:
6966 print(result.dump())
6967 self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
6968
6969 def assertRunTestResults(
6970 self, run_tests_report, expected_parse_results=None, msg=None
6971 ):
6972 """
6973 Unit test assertion to evaluate output of ParserElement.runTests(). If a list of
6974 list-dict tuples is given as the expected_parse_results argument, then these are zipped
6975 with the report tuples returned by runTests and evaluated using assertParseResultsEquals.
6976 Finally, asserts that the overall runTests() success value is True.
6977
6978 :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
6979 :param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
6980 """
6981 run_test_success, run_test_results = run_tests_report
6982
6983 if expected_parse_results is not None:
6984 merged = [
6985 (rpt[0], rpt[1], expected)
6986 for rpt, expected in zip(run_test_results, expected_parse_results)
6987 ]
6988 for test_string, result, expected in merged:
6989 # expected should be a tuple containing a list and/or a dict or an exception,
6990 # and optional failure message string
6991 # an empty tuple will skip any result validation
6992 fail_msg = next(
6993 (exp for exp in expected if isinstance(exp, str)), None
6994 )
6995 expected_exception = next(
6996 (
6997 exp
6998 for exp in expected
6999 if isinstance(exp, type) and issubclass(exp, Exception)
7000 ),
7001 None,
7002 )
7003 if expected_exception is not None:
7004 with self.assertRaises(
7005 expected_exception=expected_exception, msg=fail_msg or msg
7006 ):
7007 if isinstance(result, Exception):
7008 raise result
7009 else:
7010 expected_list = next(
7011 (exp for exp in expected if isinstance(exp, list)), None
7012 )
7013 expected_dict = next(
7014 (exp for exp in expected if isinstance(exp, dict)), None
7015 )
7016 if (expected_list, expected_dict) != (None, None):
7017 self.assertParseResultsEquals(
7018 result,
7019 expected_list=expected_list,
7020 expected_dict=expected_dict,
7021 msg=fail_msg or msg,
7022 )
7023 else:
7024 # warning here maybe?
7025 print("no validation for {!r}".format(test_string))
7026
7027 # do this last, in case some specific test results can be reported instead
7028 self.assertTrue(
7029 run_test_success, msg=msg if msg is not None else "failed runTests"
7030 )
7031
7032 @contextmanager
7033 def assertRaisesParseException(self, exc_type=ParseException, msg=None):
7034 with self.assertRaises(exc_type, msg=msg):
7035 yield
7036
7037
7038 if __name__ == "__main__":
7039
7040 selectToken = CaselessLiteral("select")
7041 fromToken = CaselessLiteral("from")
7042
7043 ident = Word(alphas, alphanums + "_$")
7044
7045 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7046 columnNameList = Group(delimitedList(columnName)).setName("columns")
7047 columnSpec = ('*' | columnNameList)
7048
7049 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7050 tableNameList = Group(delimitedList(tableName)).setName("tables")
7051
7052 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
7053
7054 # demo runTests method, including embedded comments in test string
7055 simpleSQL.runTests("""
7056 # '*' as column list and dotted table name
7057 select * from SYS.XYZZY
7058
7059 # caseless match on "SELECT", and casts back to "select"
7060 SELECT * from XYZZY, ABC
7061
7062 # list of column names, and mixed case SELECT keyword
7063 Select AA,BB,CC from Sys.dual
7064
7065 # multiple tables
7066 Select A, B, C from Sys.dual, Table2
7067
7068 # invalid SELECT keyword - should fail
7069 Xelect A, B, C from Sys.dual
7070
7071 # incomplete command - should fail
7072 Select
7073
7074 # invalid column name - should fail
7075 Select ^^^ frox Sys.dual
7076
7077 """)
7078
7079 pyparsing_common.number.runTests("""
7080 100
7081 -100
7082 +100
7083 3.14159
7084 6.02e23
7085 1e-12
7086 """)
7087
7088 # any int or real number, returned as float
7089 pyparsing_common.fnumber.runTests("""
7090 100
7091 -100
7092 +100
7093 3.14159
7094 6.02e23
7095 1e-12
7096 """)
7097
7098 pyparsing_common.hex_integer.runTests("""
7099 100
7100 FF
7101 """)
7102
7103 import uuid
7104 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
7105 pyparsing_common.uuid.runTests("""
7106 12345678-1234-5678-1234-567812345678
7107 """)