comparison planemo/lib/python3.7/site-packages/pip/_vendor/pyparsing.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 #-*- coding: utf-8 -*-
2 # module pyparsing.py
3 #
4 # Copyright (c) 2003-2019 Paul T. McGuire
5 #
6 # Permission is hereby granted, free of charge, to any person obtaining
7 # a copy of this software and associated documentation files (the
8 # "Software"), to deal in the Software without restriction, including
9 # without limitation the rights to use, copy, modify, merge, publish,
10 # distribute, sublicense, and/or sell copies of the Software, and to
11 # permit persons to whom the Software is furnished to do so, subject to
12 # the following conditions:
13 #
14 # The above copyright notice and this permission notice shall be
15 # included in all copies or substantial portions of the Software.
16 #
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29 =============================================================================
30
31 The pyparsing module is an alternative approach to creating and
32 executing simple grammars, vs. the traditional lex/yacc approach, or the
33 use of regular expressions. With pyparsing, you don't need to learn
34 a new syntax for defining grammars or matching expressions - the parsing
35 module provides a library of classes that you use to construct the
36 grammar directly in Python.
37
38 Here is a program to parse "Hello, World!" (or any greeting of the form
39 ``"<salutation>, <addressee>!"``), built up using :class:`Word`,
40 :class:`Literal`, and :class:`And` elements
41 (the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
42 and the strings are auto-converted to :class:`Literal` expressions)::
43
44 from pip._vendor.pyparsing import Word, alphas
45
46 # define grammar of a greeting
47 greet = Word(alphas) + "," + Word(alphas) + "!"
48
49 hello = "Hello, World!"
50 print (hello, "->", greet.parseString(hello))
51
52 The program outputs the following::
53
54 Hello, World! -> ['Hello', ',', 'World', '!']
55
56 The Python representation of the grammar is quite readable, owing to the
57 self-explanatory class names, and the use of '+', '|' and '^' operators.
58
59 The :class:`ParseResults` object returned from
60 :class:`ParserElement.parseString` can be
61 accessed as a nested list, a dictionary, or an object with named
62 attributes.
63
64 The pyparsing module handles some of the problems that are typically
65 vexing when writing text parsers:
66
67 - extra or missing whitespace (the above program will also handle
68 "Hello,World!", "Hello , World !", etc.)
69 - quoted strings
70 - embedded comments
71
72
73 Getting Started -
74 -----------------
75 Visit the classes :class:`ParserElement` and :class:`ParseResults` to
76 see the base classes that most other pyparsing
77 classes inherit from. Use the docstrings for examples of how to:
78
79 - construct literal match expressions from :class:`Literal` and
80 :class:`CaselessLiteral` classes
81 - construct character word-group expressions using the :class:`Word`
82 class
83 - see how to create repetitive expressions using :class:`ZeroOrMore`
84 and :class:`OneOrMore` classes
85 - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
86 and :class:`'&'<Each>` operators to combine simple expressions into
87 more complex ones
88 - associate names with your parsed results using
89 :class:`ParserElement.setResultsName`
90 - find some helpful expression short-cuts like :class:`delimitedList`
91 and :class:`oneOf`
92 - find more useful common expressions in the :class:`pyparsing_common`
93 namespace class
94 """
95
96 __version__ = "2.4.0"
97 __versionTime__ = "07 Apr 2019 18:28 UTC"
98 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
99
100 import string
101 from weakref import ref as wkref
102 import copy
103 import sys
104 import warnings
105 import re
106 import sre_constants
107 import collections
108 import pprint
109 import traceback
110 import types
111 from datetime import datetime
112
113 try:
114 # Python 3
115 from itertools import filterfalse
116 except ImportError:
117 from itertools import ifilterfalse as filterfalse
118
119 try:
120 from _thread import RLock
121 except ImportError:
122 from threading import RLock
123
124 try:
125 # Python 3
126 from collections.abc import Iterable
127 from collections.abc import MutableMapping
128 except ImportError:
129 # Python 2.7
130 from collections import Iterable
131 from collections import MutableMapping
132
133 try:
134 from collections import OrderedDict as _OrderedDict
135 except ImportError:
136 try:
137 from ordereddict import OrderedDict as _OrderedDict
138 except ImportError:
139 _OrderedDict = None
140
141 try:
142 from types import SimpleNamespace
143 except ImportError:
144 class SimpleNamespace: pass
145
146 # version compatibility configuration
147 __compat__ = SimpleNamespace()
148 __compat__.__doc__ = """
149 A cross-version compatibility configuration for pyparsing features that will be
150 released in a future version. By setting values in this configuration to True,
151 those features can be enabled in prior versions for compatibility development
152 and testing.
153
154 - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
155 of results names when an And expression is nested within an Or or MatchFirst; set to
156 True to enable bugfix to be released in pyparsing 2.4
157 """
158 __compat__.collect_all_And_tokens = True
159
160
161 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
162
163 __all__ = [ '__version__', '__versionTime__', '__author__', '__compat__',
164 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
165 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
166 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
167 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
168 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
169 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
170 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
171 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
172 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
173 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
174 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
175 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
176 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
177 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
178 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
179 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
180 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
181 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
182 ]
183
184 system_version = tuple(sys.version_info)[:3]
185 PY_3 = system_version[0] == 3
186 if PY_3:
187 _MAX_INT = sys.maxsize
188 basestring = str
189 unichr = chr
190 unicode = str
191 _ustr = str
192
193 # build list of single arg builtins, that can be used as parse actions
194 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
195
196 else:
197 _MAX_INT = sys.maxint
198 range = xrange
199
200 def _ustr(obj):
201 """Drop-in replacement for str(obj) that tries to be Unicode
202 friendly. It first tries str(obj). If that fails with
203 a UnicodeEncodeError, then it tries unicode(obj). It then
204 < returns the unicode object | encodes it with the default
205 encoding | ... >.
206 """
207 if isinstance(obj,unicode):
208 return obj
209
210 try:
211 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
212 # it won't break any existing code.
213 return str(obj)
214
215 except UnicodeEncodeError:
216 # Else encode it
217 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
218 xmlcharref = Regex(r'&#\d+;')
219 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
220 return xmlcharref.transformString(ret)
221
222 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
223 singleArgBuiltins = []
224 import __builtin__
225 for fname in "sum len sorted reversed list tuple set any all min max".split():
226 try:
227 singleArgBuiltins.append(getattr(__builtin__,fname))
228 except AttributeError:
229 continue
230
231 _generatorType = type((y for y in range(1)))
232
233 def _xml_escape(data):
234 """Escape &, <, >, ", ', etc. in a string of data."""
235
236 # ampersand must be replaced first
237 from_symbols = '&><"\''
238 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
239 for from_,to_ in zip(from_symbols, to_symbols):
240 data = data.replace(from_, to_)
241 return data
242
243 alphas = string.ascii_uppercase + string.ascii_lowercase
244 nums = "0123456789"
245 hexnums = nums + "ABCDEFabcdef"
246 alphanums = alphas + nums
247 _bslash = chr(92)
248 printables = "".join(c for c in string.printable if c not in string.whitespace)
249
250 class ParseBaseException(Exception):
251 """base exception class for all parsing runtime exceptions"""
252 # Performance tuning: we construct a *lot* of these, so keep this
253 # constructor as small and fast as possible
254 def __init__( self, pstr, loc=0, msg=None, elem=None ):
255 self.loc = loc
256 if msg is None:
257 self.msg = pstr
258 self.pstr = ""
259 else:
260 self.msg = msg
261 self.pstr = pstr
262 self.parserElement = elem
263 self.args = (pstr, loc, msg)
264
265 @classmethod
266 def _from_exception(cls, pe):
267 """
268 internal factory method to simplify creating one type of ParseException
269 from another - avoids having __init__ signature conflicts among subclasses
270 """
271 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
272
273 def __getattr__( self, aname ):
274 """supported attributes by name are:
275 - lineno - returns the line number of the exception text
276 - col - returns the column number of the exception text
277 - line - returns the line containing the exception text
278 """
279 if( aname == "lineno" ):
280 return lineno( self.loc, self.pstr )
281 elif( aname in ("col", "column") ):
282 return col( self.loc, self.pstr )
283 elif( aname == "line" ):
284 return line( self.loc, self.pstr )
285 else:
286 raise AttributeError(aname)
287
288 def __str__( self ):
289 return "%s (at char %d), (line:%d, col:%d)" % \
290 ( self.msg, self.loc, self.lineno, self.column )
291 def __repr__( self ):
292 return _ustr(self)
293 def markInputline( self, markerString = ">!<" ):
294 """Extracts the exception line from the input string, and marks
295 the location of the exception with a special symbol.
296 """
297 line_str = self.line
298 line_column = self.column - 1
299 if markerString:
300 line_str = "".join((line_str[:line_column],
301 markerString, line_str[line_column:]))
302 return line_str.strip()
303 def __dir__(self):
304 return "lineno col line".split() + dir(type(self))
305
306 class ParseException(ParseBaseException):
307 """
308 Exception thrown when parse expressions don't match class;
309 supported attributes by name are:
310 - lineno - returns the line number of the exception text
311 - col - returns the column number of the exception text
312 - line - returns the line containing the exception text
313
314 Example::
315
316 try:
317 Word(nums).setName("integer").parseString("ABC")
318 except ParseException as pe:
319 print(pe)
320 print("column: {}".format(pe.col))
321
322 prints::
323
324 Expected integer (at char 0), (line:1, col:1)
325 column: 1
326
327 """
328
329 @staticmethod
330 def explain(exc, depth=16):
331 """
332 Method to take an exception and translate the Python internal traceback into a list
333 of the pyparsing expressions that caused the exception to be raised.
334
335 Parameters:
336
337 - exc - exception raised during parsing (need not be a ParseException, in support
338 of Python exceptions that might be raised in a parse action)
339 - depth (default=16) - number of levels back in the stack trace to list expression
340 and function names; if None, the full stack trace names will be listed; if 0, only
341 the failing input line, marker, and exception string will be shown
342
343 Returns a multi-line string listing the ParserElements and/or function names in the
344 exception's stack trace.
345
346 Note: the diagnostic output will include string representations of the expressions
347 that failed to parse. These representations will be more helpful if you use `setName` to
348 give identifiable names to your expressions. Otherwise they will use the default string
349 forms, which may be cryptic to read.
350
351 explain() is only supported under Python 3.
352 """
353 import inspect
354
355 if depth is None:
356 depth = sys.getrecursionlimit()
357 ret = []
358 if isinstance(exc, ParseBaseException):
359 ret.append(exc.line)
360 ret.append(' ' * (exc.col - 1) + '^')
361 ret.append("{0}: {1}".format(type(exc).__name__, exc))
362
363 if depth > 0:
364 callers = inspect.getinnerframes(exc.__traceback__, context=depth)
365 seen = set()
366 for i, ff in enumerate(callers[-depth:]):
367 frm = ff[0]
368
369 f_self = frm.f_locals.get('self', None)
370 if isinstance(f_self, ParserElement):
371 if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
372 continue
373 if f_self in seen:
374 continue
375 seen.add(f_self)
376
377 self_type = type(f_self)
378 ret.append("{0}.{1} - {2}".format(self_type.__module__,
379 self_type.__name__,
380 f_self))
381 elif f_self is not None:
382 self_type = type(f_self)
383 ret.append("{0}.{1}".format(self_type.__module__,
384 self_type.__name__))
385 else:
386 code = frm.f_code
387 if code.co_name in ('wrapper', '<module>'):
388 continue
389
390 ret.append("{0}".format(code.co_name))
391
392 depth -= 1
393 if not depth:
394 break
395
396 return '\n'.join(ret)
397
398
399 class ParseFatalException(ParseBaseException):
400 """user-throwable exception thrown when inconsistent parse content
401 is found; stops all parsing immediately"""
402 pass
403
404 class ParseSyntaxException(ParseFatalException):
405 """just like :class:`ParseFatalException`, but thrown internally
406 when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
407 that parsing is to stop immediately because an unbacktrackable
408 syntax error has been found.
409 """
410 pass
411
412 #~ class ReparseException(ParseBaseException):
413 #~ """Experimental class - parse actions can raise this exception to cause
414 #~ pyparsing to reparse the input string:
415 #~ - with a modified input string, and/or
416 #~ - with a modified start location
417 #~ Set the values of the ReparseException in the constructor, and raise the
418 #~ exception in a parse action to cause pyparsing to use the new string/location.
419 #~ Setting the values as None causes no change to be made.
420 #~ """
421 #~ def __init_( self, newstring, restartLoc ):
422 #~ self.newParseText = newstring
423 #~ self.reparseLoc = restartLoc
424
425 class RecursiveGrammarException(Exception):
426 """exception thrown by :class:`ParserElement.validate` if the
427 grammar could be improperly recursive
428 """
429 def __init__( self, parseElementList ):
430 self.parseElementTrace = parseElementList
431
432 def __str__( self ):
433 return "RecursiveGrammarException: %s" % self.parseElementTrace
434
435 class _ParseResultsWithOffset(object):
436 def __init__(self,p1,p2):
437 self.tup = (p1,p2)
438 def __getitem__(self,i):
439 return self.tup[i]
440 def __repr__(self):
441 return repr(self.tup[0])
442 def setOffset(self,i):
443 self.tup = (self.tup[0],i)
444
445 class ParseResults(object):
446 """Structured parse results, to provide multiple means of access to
447 the parsed data:
448
449 - as a list (``len(results)``)
450 - by list index (``results[0], results[1]``, etc.)
451 - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`)
452
453 Example::
454
455 integer = Word(nums)
456 date_str = (integer.setResultsName("year") + '/'
457 + integer.setResultsName("month") + '/'
458 + integer.setResultsName("day"))
459 # equivalent form:
460 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
461
462 # parseString returns a ParseResults object
463 result = date_str.parseString("1999/12/31")
464
465 def test(s, fn=repr):
466 print("%s -> %s" % (s, fn(eval(s))))
467 test("list(result)")
468 test("result[0]")
469 test("result['month']")
470 test("result.day")
471 test("'month' in result")
472 test("'minutes' in result")
473 test("result.dump()", str)
474
475 prints::
476
477 list(result) -> ['1999', '/', '12', '/', '31']
478 result[0] -> '1999'
479 result['month'] -> '12'
480 result.day -> '31'
481 'month' in result -> True
482 'minutes' in result -> False
483 result.dump() -> ['1999', '/', '12', '/', '31']
484 - day: 31
485 - month: 12
486 - year: 1999
487 """
488 def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
489 if isinstance(toklist, cls):
490 return toklist
491 retobj = object.__new__(cls)
492 retobj.__doinit = True
493 return retobj
494
495 # Performance tuning: we construct a *lot* of these, so keep this
496 # constructor as small and fast as possible
497 def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
498 if self.__doinit:
499 self.__doinit = False
500 self.__name = None
501 self.__parent = None
502 self.__accumNames = {}
503 self.__asList = asList
504 self.__modal = modal
505 if toklist is None:
506 toklist = []
507 if isinstance(toklist, list):
508 self.__toklist = toklist[:]
509 elif isinstance(toklist, _generatorType):
510 self.__toklist = list(toklist)
511 else:
512 self.__toklist = [toklist]
513 self.__tokdict = dict()
514
515 if name is not None and name:
516 if not modal:
517 self.__accumNames[name] = 0
518 if isinstance(name,int):
519 name = _ustr(name) # will always return a str, but use _ustr for consistency
520 self.__name = name
521 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
522 if isinstance(toklist,basestring):
523 toklist = [ toklist ]
524 if asList:
525 if isinstance(toklist,ParseResults):
526 self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
527 else:
528 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
529 self[name].__name = name
530 else:
531 try:
532 self[name] = toklist[0]
533 except (KeyError,TypeError,IndexError):
534 self[name] = toklist
535
536 def __getitem__( self, i ):
537 if isinstance( i, (int,slice) ):
538 return self.__toklist[i]
539 else:
540 if i not in self.__accumNames:
541 return self.__tokdict[i][-1][0]
542 else:
543 return ParseResults([ v[0] for v in self.__tokdict[i] ])
544
545 def __setitem__( self, k, v, isinstance=isinstance ):
546 if isinstance(v,_ParseResultsWithOffset):
547 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
548 sub = v[0]
549 elif isinstance(k,(int,slice)):
550 self.__toklist[k] = v
551 sub = v
552 else:
553 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
554 sub = v
555 if isinstance(sub,ParseResults):
556 sub.__parent = wkref(self)
557
558 def __delitem__( self, i ):
559 if isinstance(i,(int,slice)):
560 mylen = len( self.__toklist )
561 del self.__toklist[i]
562
563 # convert int to slice
564 if isinstance(i, int):
565 if i < 0:
566 i += mylen
567 i = slice(i, i+1)
568 # get removed indices
569 removed = list(range(*i.indices(mylen)))
570 removed.reverse()
571 # fixup indices in token dictionary
572 for name,occurrences in self.__tokdict.items():
573 for j in removed:
574 for k, (value, position) in enumerate(occurrences):
575 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
576 else:
577 del self.__tokdict[i]
578
579 def __contains__( self, k ):
580 return k in self.__tokdict
581
582 def __len__( self ): return len( self.__toklist )
583 def __bool__(self): return ( not not self.__toklist )
584 __nonzero__ = __bool__
585 def __iter__( self ): return iter( self.__toklist )
586 def __reversed__( self ): return iter( self.__toklist[::-1] )
587 def _iterkeys( self ):
588 if hasattr(self.__tokdict, "iterkeys"):
589 return self.__tokdict.iterkeys()
590 else:
591 return iter(self.__tokdict)
592
593 def _itervalues( self ):
594 return (self[k] for k in self._iterkeys())
595
596 def _iteritems( self ):
597 return ((k, self[k]) for k in self._iterkeys())
598
599 if PY_3:
600 keys = _iterkeys
601 """Returns an iterator of all named result keys."""
602
603 values = _itervalues
604 """Returns an iterator of all named result values."""
605
606 items = _iteritems
607 """Returns an iterator of all named result key-value tuples."""
608
609 else:
610 iterkeys = _iterkeys
611 """Returns an iterator of all named result keys (Python 2.x only)."""
612
613 itervalues = _itervalues
614 """Returns an iterator of all named result values (Python 2.x only)."""
615
616 iteritems = _iteritems
617 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
618
619 def keys( self ):
620 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
621 return list(self.iterkeys())
622
623 def values( self ):
624 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
625 return list(self.itervalues())
626
627 def items( self ):
628 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
629 return list(self.iteritems())
630
631 def haskeys( self ):
632 """Since keys() returns an iterator, this method is helpful in bypassing
633 code that looks for the existence of any defined results names."""
634 return bool(self.__tokdict)
635
636 def pop( self, *args, **kwargs):
637 """
638 Removes and returns item at specified index (default= ``last``).
639 Supports both ``list`` and ``dict`` semantics for ``pop()``. If
640 passed no argument or an integer argument, it will use ``list``
641 semantics and pop tokens from the list of parsed tokens. If passed
642 a non-integer argument (most likely a string), it will use ``dict``
643 semantics and pop the corresponding value from any defined results
644 names. A second default return value argument is supported, just as in
645 ``dict.pop()``.
646
647 Example::
648
649 def remove_first(tokens):
650 tokens.pop(0)
651 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
652 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
653
654 label = Word(alphas)
655 patt = label("LABEL") + OneOrMore(Word(nums))
656 print(patt.parseString("AAB 123 321").dump())
657
658 # Use pop() in a parse action to remove named result (note that corresponding value is not
659 # removed from list form of results)
660 def remove_LABEL(tokens):
661 tokens.pop("LABEL")
662 return tokens
663 patt.addParseAction(remove_LABEL)
664 print(patt.parseString("AAB 123 321").dump())
665
666 prints::
667
668 ['AAB', '123', '321']
669 - LABEL: AAB
670
671 ['AAB', '123', '321']
672 """
673 if not args:
674 args = [-1]
675 for k,v in kwargs.items():
676 if k == 'default':
677 args = (args[0], v)
678 else:
679 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
680 if (isinstance(args[0], int) or
681 len(args) == 1 or
682 args[0] in self):
683 index = args[0]
684 ret = self[index]
685 del self[index]
686 return ret
687 else:
688 defaultvalue = args[1]
689 return defaultvalue
690
691 def get(self, key, defaultValue=None):
692 """
693 Returns named result matching the given key, or if there is no
694 such name, then returns the given ``defaultValue`` or ``None`` if no
695 ``defaultValue`` is specified.
696
697 Similar to ``dict.get()``.
698
699 Example::
700
701 integer = Word(nums)
702 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
703
704 result = date_str.parseString("1999/12/31")
705 print(result.get("year")) # -> '1999'
706 print(result.get("hour", "not specified")) # -> 'not specified'
707 print(result.get("hour")) # -> None
708 """
709 if key in self:
710 return self[key]
711 else:
712 return defaultValue
713
714 def insert( self, index, insStr ):
715 """
716 Inserts new element at location index in the list of parsed tokens.
717
718 Similar to ``list.insert()``.
719
720 Example::
721
722 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
723
724 # use a parse action to insert the parse location in the front of the parsed results
725 def insert_locn(locn, tokens):
726 tokens.insert(0, locn)
727 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
728 """
729 self.__toklist.insert(index, insStr)
730 # fixup indices in token dictionary
731 for name,occurrences in self.__tokdict.items():
732 for k, (value, position) in enumerate(occurrences):
733 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
734
735 def append( self, item ):
736 """
737 Add single element to end of ParseResults list of elements.
738
739 Example::
740
741 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
742
743 # use a parse action to compute the sum of the parsed integers, and add it to the end
744 def append_sum(tokens):
745 tokens.append(sum(map(int, tokens)))
746 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
747 """
748 self.__toklist.append(item)
749
750 def extend( self, itemseq ):
751 """
752 Add sequence of elements to end of ParseResults list of elements.
753
754 Example::
755
756 patt = OneOrMore(Word(alphas))
757
758 # use a parse action to append the reverse of the matched strings, to make a palindrome
759 def make_palindrome(tokens):
760 tokens.extend(reversed([t[::-1] for t in tokens]))
761 return ''.join(tokens)
762 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
763 """
764 if isinstance(itemseq, ParseResults):
765 self.__iadd__(itemseq)
766 else:
767 self.__toklist.extend(itemseq)
768
769 def clear( self ):
770 """
771 Clear all elements and results names.
772 """
773 del self.__toklist[:]
774 self.__tokdict.clear()
775
776 def __getattr__( self, name ):
777 try:
778 return self[name]
779 except KeyError:
780 return ""
781
782 if name in self.__tokdict:
783 if name not in self.__accumNames:
784 return self.__tokdict[name][-1][0]
785 else:
786 return ParseResults([ v[0] for v in self.__tokdict[name] ])
787 else:
788 return ""
789
790 def __add__( self, other ):
791 ret = self.copy()
792 ret += other
793 return ret
794
795 def __iadd__( self, other ):
796 if other.__tokdict:
797 offset = len(self.__toklist)
798 addoffset = lambda a: offset if a<0 else a+offset
799 otheritems = other.__tokdict.items()
800 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
801 for (k,vlist) in otheritems for v in vlist]
802 for k,v in otherdictitems:
803 self[k] = v
804 if isinstance(v[0],ParseResults):
805 v[0].__parent = wkref(self)
806
807 self.__toklist += other.__toklist
808 self.__accumNames.update( other.__accumNames )
809 return self
810
811 def __radd__(self, other):
812 if isinstance(other,int) and other == 0:
813 # useful for merging many ParseResults using sum() builtin
814 return self.copy()
815 else:
816 # this may raise a TypeError - so be it
817 return other + self
818
819 def __repr__( self ):
820 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
821
822 def __str__( self ):
823 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
824
825 def _asStringList( self, sep='' ):
826 out = []
827 for item in self.__toklist:
828 if out and sep:
829 out.append(sep)
830 if isinstance( item, ParseResults ):
831 out += item._asStringList()
832 else:
833 out.append( _ustr(item) )
834 return out
835
836 def asList( self ):
837 """
838 Returns the parse results as a nested list of matching tokens, all converted to strings.
839
840 Example::
841
842 patt = OneOrMore(Word(alphas))
843 result = patt.parseString("sldkj lsdkj sldkj")
844 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
845 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
846
847 # Use asList() to create an actual list
848 result_list = result.asList()
849 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
850 """
851 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
852
853 def asDict( self ):
854 """
855 Returns the named parse results as a nested dictionary.
856
857 Example::
858
859 integer = Word(nums)
860 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
861
862 result = date_str.parseString('12/31/1999')
863 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
864
865 result_dict = result.asDict()
866 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
867
868 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
869 import json
870 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
871 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
872 """
873 if PY_3:
874 item_fn = self.items
875 else:
876 item_fn = self.iteritems
877
878 def toItem(obj):
879 if isinstance(obj, ParseResults):
880 if obj.haskeys():
881 return obj.asDict()
882 else:
883 return [toItem(v) for v in obj]
884 else:
885 return obj
886
887 return dict((k,toItem(v)) for k,v in item_fn())
888
889 def copy( self ):
890 """
891 Returns a new copy of a :class:`ParseResults` object.
892 """
893 ret = ParseResults( self.__toklist )
894 ret.__tokdict = dict(self.__tokdict.items())
895 ret.__parent = self.__parent
896 ret.__accumNames.update( self.__accumNames )
897 ret.__name = self.__name
898 return ret
899
900 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
901 """
902 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
903 """
904 nl = "\n"
905 out = []
906 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
907 for v in vlist)
908 nextLevelIndent = indent + " "
909
910 # collapse out indents if formatting is not desired
911 if not formatted:
912 indent = ""
913 nextLevelIndent = ""
914 nl = ""
915
916 selfTag = None
917 if doctag is not None:
918 selfTag = doctag
919 else:
920 if self.__name:
921 selfTag = self.__name
922
923 if not selfTag:
924 if namedItemsOnly:
925 return ""
926 else:
927 selfTag = "ITEM"
928
929 out += [ nl, indent, "<", selfTag, ">" ]
930
931 for i,res in enumerate(self.__toklist):
932 if isinstance(res,ParseResults):
933 if i in namedItems:
934 out += [ res.asXML(namedItems[i],
935 namedItemsOnly and doctag is None,
936 nextLevelIndent,
937 formatted)]
938 else:
939 out += [ res.asXML(None,
940 namedItemsOnly and doctag is None,
941 nextLevelIndent,
942 formatted)]
943 else:
944 # individual token, see if there is a name for it
945 resTag = None
946 if i in namedItems:
947 resTag = namedItems[i]
948 if not resTag:
949 if namedItemsOnly:
950 continue
951 else:
952 resTag = "ITEM"
953 xmlBodyText = _xml_escape(_ustr(res))
954 out += [ nl, nextLevelIndent, "<", resTag, ">",
955 xmlBodyText,
956 "</", resTag, ">" ]
957
958 out += [ nl, indent, "</", selfTag, ">" ]
959 return "".join(out)
960
961 def __lookup(self,sub):
962 for k,vlist in self.__tokdict.items():
963 for v,loc in vlist:
964 if sub is v:
965 return k
966 return None
967
968 def getName(self):
969 r"""
970 Returns the results name for this token expression. Useful when several
971 different expressions might match at a particular location.
972
973 Example::
974
975 integer = Word(nums)
976 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
977 house_number_expr = Suppress('#') + Word(nums, alphanums)
978 user_data = (Group(house_number_expr)("house_number")
979 | Group(ssn_expr)("ssn")
980 | Group(integer)("age"))
981 user_info = OneOrMore(user_data)
982
983 result = user_info.parseString("22 111-22-3333 #221B")
984 for item in result:
985 print(item.getName(), ':', item[0])
986
987 prints::
988
989 age : 22
990 ssn : 111-22-3333
991 house_number : 221B
992 """
993 if self.__name:
994 return self.__name
995 elif self.__parent:
996 par = self.__parent()
997 if par:
998 return par.__lookup(self)
999 else:
1000 return None
1001 elif (len(self) == 1 and
1002 len(self.__tokdict) == 1 and
1003 next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
1004 return next(iter(self.__tokdict.keys()))
1005 else:
1006 return None
1007
1008 def dump(self, indent='', depth=0, full=True):
1009 """
1010 Diagnostic method for listing out the contents of
1011 a :class:`ParseResults`. Accepts an optional ``indent`` argument so
1012 that this string can be embedded in a nested display of other data.
1013
1014 Example::
1015
1016 integer = Word(nums)
1017 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1018
1019 result = date_str.parseString('12/31/1999')
1020 print(result.dump())
1021
1022 prints::
1023
1024 ['12', '/', '31', '/', '1999']
1025 - day: 1999
1026 - month: 31
1027 - year: 12
1028 """
1029 out = []
1030 NL = '\n'
1031 out.append( indent+_ustr(self.asList()) )
1032 if full:
1033 if self.haskeys():
1034 items = sorted((str(k), v) for k,v in self.items())
1035 for k,v in items:
1036 if out:
1037 out.append(NL)
1038 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
1039 if isinstance(v,ParseResults):
1040 if v:
1041 out.append( v.dump(indent,depth+1) )
1042 else:
1043 out.append(_ustr(v))
1044 else:
1045 out.append(repr(v))
1046 elif any(isinstance(vv,ParseResults) for vv in self):
1047 v = self
1048 for i,vv in enumerate(v):
1049 if isinstance(vv,ParseResults):
1050 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
1051 else:
1052 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
1053
1054 return "".join(out)
1055
1056 def pprint(self, *args, **kwargs):
1057 """
1058 Pretty-printer for parsed results as a list, using the
1059 `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
1060 Accepts additional positional or keyword args as defined for
1061 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
1062
1063 Example::
1064
1065 ident = Word(alphas, alphanums)
1066 num = Word(nums)
1067 func = Forward()
1068 term = ident | num | Group('(' + func + ')')
1069 func <<= ident + Group(Optional(delimitedList(term)))
1070 result = func.parseString("fna a,b,(fnb c,d,200),100")
1071 result.pprint(width=40)
1072
1073 prints::
1074
1075 ['fna',
1076 ['a',
1077 'b',
1078 ['(', 'fnb', ['c', 'd', '200'], ')'],
1079 '100']]
1080 """
1081 pprint.pprint(self.asList(), *args, **kwargs)
1082
1083 # add support for pickle protocol
1084 def __getstate__(self):
1085 return ( self.__toklist,
1086 ( self.__tokdict.copy(),
1087 self.__parent is not None and self.__parent() or None,
1088 self.__accumNames,
1089 self.__name ) )
1090
1091 def __setstate__(self,state):
1092 self.__toklist = state[0]
1093 (self.__tokdict,
1094 par,
1095 inAccumNames,
1096 self.__name) = state[1]
1097 self.__accumNames = {}
1098 self.__accumNames.update(inAccumNames)
1099 if par is not None:
1100 self.__parent = wkref(par)
1101 else:
1102 self.__parent = None
1103
1104 def __getnewargs__(self):
1105 return self.__toklist, self.__name, self.__asList, self.__modal
1106
1107 def __dir__(self):
1108 return (dir(type(self)) + list(self.keys()))
1109
1110 MutableMapping.register(ParseResults)
1111
1112 def col (loc,strg):
1113 """Returns current column within a string, counting newlines as line separators.
1114 The first column is number 1.
1115
1116 Note: the default parsing behavior is to expand tabs in the input string
1117 before starting the parsing process. See
1118 :class:`ParserElement.parseString` for more
1119 information on parsing strings containing ``<TAB>`` s, and suggested
1120 methods to maintain a consistent view of the parsed string, the parse
1121 location, and line and column positions within the parsed string.
1122 """
1123 s = strg
1124 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
1125
1126 def lineno(loc,strg):
1127 """Returns current line number within a string, counting newlines as line separators.
1128 The first line is number 1.
1129
1130 Note - the default parsing behavior is to expand tabs in the input string
1131 before starting the parsing process. See :class:`ParserElement.parseString`
1132 for more information on parsing strings containing ``<TAB>`` s, and
1133 suggested methods to maintain a consistent view of the parsed string, the
1134 parse location, and line and column positions within the parsed string.
1135 """
1136 return strg.count("\n",0,loc) + 1
1137
1138 def line( loc, strg ):
1139 """Returns the line of text containing loc within a string, counting newlines as line separators.
1140 """
1141 lastCR = strg.rfind("\n", 0, loc)
1142 nextCR = strg.find("\n", loc)
1143 if nextCR >= 0:
1144 return strg[lastCR+1:nextCR]
1145 else:
1146 return strg[lastCR+1:]
1147
1148 def _defaultStartDebugAction( instring, loc, expr ):
1149 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1150
1151 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
1152 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1153
1154 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
1155 print ("Exception raised:" + _ustr(exc))
1156
1157 def nullDebugAction(*args):
1158 """'Do-nothing' debug action, to suppress debugging output during parsing."""
1159 pass
1160
1161 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1162 #~ 'decorator to trim function calls to match the arity of the target'
1163 #~ def _trim_arity(func, maxargs=3):
1164 #~ if func in singleArgBuiltins:
1165 #~ return lambda s,l,t: func(t)
1166 #~ limit = 0
1167 #~ foundArity = False
1168 #~ def wrapper(*args):
1169 #~ nonlocal limit,foundArity
1170 #~ while 1:
1171 #~ try:
1172 #~ ret = func(*args[limit:])
1173 #~ foundArity = True
1174 #~ return ret
1175 #~ except TypeError:
1176 #~ if limit == maxargs or foundArity:
1177 #~ raise
1178 #~ limit += 1
1179 #~ continue
1180 #~ return wrapper
1181
1182 # this version is Python 2.x-3.x cross-compatible
1183 'decorator to trim function calls to match the arity of the target'
1184 def _trim_arity(func, maxargs=2):
1185 if func in singleArgBuiltins:
1186 return lambda s,l,t: func(t)
1187 limit = [0]
1188 foundArity = [False]
1189
1190 # traceback return data structure changed in Py3.5 - normalize back to plain tuples
1191 if system_version[:2] >= (3,5):
1192 def extract_stack(limit=0):
1193 # special handling for Python 3.5.0 - extra deep call stack by 1
1194 offset = -3 if system_version == (3,5,0) else -2
1195 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1196 return [frame_summary[:2]]
1197 def extract_tb(tb, limit=0):
1198 frames = traceback.extract_tb(tb, limit=limit)
1199 frame_summary = frames[-1]
1200 return [frame_summary[:2]]
1201 else:
1202 extract_stack = traceback.extract_stack
1203 extract_tb = traceback.extract_tb
1204
1205 # synthesize what would be returned by traceback.extract_stack at the call to
1206 # user's parse action 'func', so that we don't incur call penalty at parse time
1207
1208 LINE_DIFF = 6
1209 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1210 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1211 this_line = extract_stack(limit=2)[-1]
1212 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1213
1214 def wrapper(*args):
1215 while 1:
1216 try:
1217 ret = func(*args[limit[0]:])
1218 foundArity[0] = True
1219 return ret
1220 except TypeError:
1221 # re-raise TypeErrors if they did not come from our arity testing
1222 if foundArity[0]:
1223 raise
1224 else:
1225 try:
1226 tb = sys.exc_info()[-1]
1227 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1228 raise
1229 finally:
1230 del tb
1231
1232 if limit[0] <= maxargs:
1233 limit[0] += 1
1234 continue
1235 raise
1236
1237 # copy func name to wrapper for sensible debug output
1238 func_name = "<parse action>"
1239 try:
1240 func_name = getattr(func, '__name__',
1241 getattr(func, '__class__').__name__)
1242 except Exception:
1243 func_name = str(func)
1244 wrapper.__name__ = func_name
1245
1246 return wrapper
1247
1248 class ParserElement(object):
1249 """Abstract base level parser element class."""
1250 DEFAULT_WHITE_CHARS = " \n\t\r"
1251 verbose_stacktrace = False
1252
1253 @staticmethod
1254 def setDefaultWhitespaceChars( chars ):
1255 r"""
1256 Overrides the default whitespace chars
1257
1258 Example::
1259
1260 # default whitespace chars are space, <TAB> and newline
1261 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1262
1263 # change to just treat newline as significant
1264 ParserElement.setDefaultWhitespaceChars(" \t")
1265 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1266 """
1267 ParserElement.DEFAULT_WHITE_CHARS = chars
1268
1269 @staticmethod
1270 def inlineLiteralsUsing(cls):
1271 """
1272 Set class to be used for inclusion of string literals into a parser.
1273
1274 Example::
1275
1276 # default literal class used is Literal
1277 integer = Word(nums)
1278 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1279
1280 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1281
1282
1283 # change to Suppress
1284 ParserElement.inlineLiteralsUsing(Suppress)
1285 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1286
1287 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1288 """
1289 ParserElement._literalStringClass = cls
1290
1291 def __init__( self, savelist=False ):
1292 self.parseAction = list()
1293 self.failAction = None
1294 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
1295 self.strRepr = None
1296 self.resultsName = None
1297 self.saveAsList = savelist
1298 self.skipWhitespace = True
1299 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
1300 self.copyDefaultWhiteChars = True
1301 self.mayReturnEmpty = False # used when checking for left-recursion
1302 self.keepTabs = False
1303 self.ignoreExprs = list()
1304 self.debug = False
1305 self.streamlined = False
1306 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1307 self.errmsg = ""
1308 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1309 self.debugActions = ( None, None, None ) #custom debug actions
1310 self.re = None
1311 self.callPreparse = True # used to avoid redundant calls to preParse
1312 self.callDuringTry = False
1313
1314 def copy( self ):
1315 """
1316 Make a copy of this :class:`ParserElement`. Useful for defining
1317 different parse actions for the same parsing pattern, using copies of
1318 the original parse element.
1319
1320 Example::
1321
1322 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1323 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1324 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1325
1326 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1327
1328 prints::
1329
1330 [5120, 100, 655360, 268435456]
1331
1332 Equivalent form of ``expr.copy()`` is just ``expr()``::
1333
1334 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1335 """
1336 cpy = copy.copy( self )
1337 cpy.parseAction = self.parseAction[:]
1338 cpy.ignoreExprs = self.ignoreExprs[:]
1339 if self.copyDefaultWhiteChars:
1340 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1341 return cpy
1342
1343 def setName( self, name ):
1344 """
1345 Define name for this expression, makes debugging and exception messages clearer.
1346
1347 Example::
1348
1349 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1350 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1351 """
1352 self.name = name
1353 self.errmsg = "Expected " + self.name
1354 if hasattr(self,"exception"):
1355 self.exception.msg = self.errmsg
1356 return self
1357
1358 def setResultsName( self, name, listAllMatches=False ):
1359 """
1360 Define name for referencing matching tokens as a nested attribute
1361 of the returned parse results.
1362 NOTE: this returns a *copy* of the original :class:`ParserElement` object;
1363 this is so that the client can define a basic element, such as an
1364 integer, and reference it in multiple places with different names.
1365
1366 You can also set results names using the abbreviated syntax,
1367 ``expr("name")`` in place of ``expr.setResultsName("name")``
1368 - see :class:`__call__`.
1369
1370 Example::
1371
1372 date_str = (integer.setResultsName("year") + '/'
1373 + integer.setResultsName("month") + '/'
1374 + integer.setResultsName("day"))
1375
1376 # equivalent form:
1377 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1378 """
1379 newself = self.copy()
1380 if name.endswith("*"):
1381 name = name[:-1]
1382 listAllMatches=True
1383 newself.resultsName = name
1384 newself.modalResults = not listAllMatches
1385 return newself
1386
1387 def setBreak(self,breakFlag = True):
1388 """Method to invoke the Python pdb debugger when this element is
1389 about to be parsed. Set ``breakFlag`` to True to enable, False to
1390 disable.
1391 """
1392 if breakFlag:
1393 _parseMethod = self._parse
1394 def breaker(instring, loc, doActions=True, callPreParse=True):
1395 import pdb
1396 pdb.set_trace()
1397 return _parseMethod( instring, loc, doActions, callPreParse )
1398 breaker._originalParseMethod = _parseMethod
1399 self._parse = breaker
1400 else:
1401 if hasattr(self._parse,"_originalParseMethod"):
1402 self._parse = self._parse._originalParseMethod
1403 return self
1404
1405 def setParseAction( self, *fns, **kwargs ):
1406 """
1407 Define one or more actions to perform when successfully matching parse element definition.
1408 Parse action fn is a callable method with 0-3 arguments, called as ``fn(s,loc,toks)`` ,
1409 ``fn(loc,toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
1410
1411 - s = the original string being parsed (see note below)
1412 - loc = the location of the matching substring
1413 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
1414
1415 If the functions in fns modify the tokens, they can return them as the return
1416 value from fn, and the modified list of tokens will replace the original.
1417 Otherwise, fn does not need to return any value.
1418
1419 Optional keyword arguments:
1420 - callDuringTry = (default= ``False`` ) indicate if parse action should be run during lookaheads and alternate testing
1421
1422 Note: the default parsing behavior is to expand tabs in the input string
1423 before starting the parsing process. See :class:`parseString for more
1424 information on parsing strings containing ``<TAB>`` s, and suggested
1425 methods to maintain a consistent view of the parsed string, the parse
1426 location, and line and column positions within the parsed string.
1427
1428 Example::
1429
1430 integer = Word(nums)
1431 date_str = integer + '/' + integer + '/' + integer
1432
1433 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1434
1435 # use parse action to convert to ints at parse time
1436 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1437 date_str = integer + '/' + integer + '/' + integer
1438
1439 # note that integer fields are now ints, not strings
1440 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1441 """
1442 self.parseAction = list(map(_trim_arity, list(fns)))
1443 self.callDuringTry = kwargs.get("callDuringTry", False)
1444 return self
1445
1446 def addParseAction( self, *fns, **kwargs ):
1447 """
1448 Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
1449
1450 See examples in :class:`copy`.
1451 """
1452 self.parseAction += list(map(_trim_arity, list(fns)))
1453 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1454 return self
1455
1456 def addCondition(self, *fns, **kwargs):
1457 """Add a boolean predicate function to expression's list of parse actions. See
1458 :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
1459 functions passed to ``addCondition`` need to return boolean success/fail of the condition.
1460
1461 Optional keyword arguments:
1462 - message = define a custom message to be used in the raised exception
1463 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1464
1465 Example::
1466
1467 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1468 year_int = integer.copy()
1469 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1470 date_str = year_int + '/' + integer + '/' + integer
1471
1472 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1473 """
1474 msg = kwargs.get("message", "failed user-defined condition")
1475 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1476 for fn in fns:
1477 fn = _trim_arity(fn)
1478 def pa(s,l,t):
1479 if not bool(fn(s,l,t)):
1480 raise exc_type(s,l,msg)
1481 self.parseAction.append(pa)
1482 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1483 return self
1484
1485 def setFailAction( self, fn ):
1486 """Define action to perform if parsing fails at this expression.
1487 Fail acton fn is a callable function that takes the arguments
1488 ``fn(s,loc,expr,err)`` where:
1489 - s = string being parsed
1490 - loc = location where expression match was attempted and failed
1491 - expr = the parse expression that failed
1492 - err = the exception thrown
1493 The function returns no value. It may throw :class:`ParseFatalException`
1494 if it is desired to stop parsing immediately."""
1495 self.failAction = fn
1496 return self
1497
1498 def _skipIgnorables( self, instring, loc ):
1499 exprsFound = True
1500 while exprsFound:
1501 exprsFound = False
1502 for e in self.ignoreExprs:
1503 try:
1504 while 1:
1505 loc,dummy = e._parse( instring, loc )
1506 exprsFound = True
1507 except ParseException:
1508 pass
1509 return loc
1510
1511 def preParse( self, instring, loc ):
1512 if self.ignoreExprs:
1513 loc = self._skipIgnorables( instring, loc )
1514
1515 if self.skipWhitespace:
1516 wt = self.whiteChars
1517 instrlen = len(instring)
1518 while loc < instrlen and instring[loc] in wt:
1519 loc += 1
1520
1521 return loc
1522
1523 def parseImpl( self, instring, loc, doActions=True ):
1524 return loc, []
1525
1526 def postParse( self, instring, loc, tokenlist ):
1527 return tokenlist
1528
1529 #~ @profile
1530 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1531 debugging = ( self.debug ) #and doActions )
1532
1533 if debugging or self.failAction:
1534 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
1535 if (self.debugActions[0] ):
1536 self.debugActions[0]( instring, loc, self )
1537 if callPreParse and self.callPreparse:
1538 preloc = self.preParse( instring, loc )
1539 else:
1540 preloc = loc
1541 tokensStart = preloc
1542 try:
1543 try:
1544 loc,tokens = self.parseImpl( instring, preloc, doActions )
1545 except IndexError:
1546 raise ParseException( instring, len(instring), self.errmsg, self )
1547 except ParseBaseException as err:
1548 #~ print ("Exception raised:", err)
1549 if self.debugActions[2]:
1550 self.debugActions[2]( instring, tokensStart, self, err )
1551 if self.failAction:
1552 self.failAction( instring, tokensStart, self, err )
1553 raise
1554 else:
1555 if callPreParse and self.callPreparse:
1556 preloc = self.preParse( instring, loc )
1557 else:
1558 preloc = loc
1559 tokensStart = preloc
1560 if self.mayIndexError or preloc >= len(instring):
1561 try:
1562 loc,tokens = self.parseImpl( instring, preloc, doActions )
1563 except IndexError:
1564 raise ParseException( instring, len(instring), self.errmsg, self )
1565 else:
1566 loc,tokens = self.parseImpl( instring, preloc, doActions )
1567
1568 tokens = self.postParse( instring, loc, tokens )
1569
1570 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1571 if self.parseAction and (doActions or self.callDuringTry):
1572 if debugging:
1573 try:
1574 for fn in self.parseAction:
1575 try:
1576 tokens = fn( instring, tokensStart, retTokens )
1577 except IndexError as parse_action_exc:
1578 exc = ParseException("exception raised in parse action")
1579 exc.__cause__ = parse_action_exc
1580 raise exc
1581
1582 if tokens is not None and tokens is not retTokens:
1583 retTokens = ParseResults( tokens,
1584 self.resultsName,
1585 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1586 modal=self.modalResults )
1587 except ParseBaseException as err:
1588 #~ print "Exception raised in user parse action:", err
1589 if (self.debugActions[2] ):
1590 self.debugActions[2]( instring, tokensStart, self, err )
1591 raise
1592 else:
1593 for fn in self.parseAction:
1594 try:
1595 tokens = fn( instring, tokensStart, retTokens )
1596 except IndexError as parse_action_exc:
1597 exc = ParseException("exception raised in parse action")
1598 exc.__cause__ = parse_action_exc
1599 raise exc
1600
1601 if tokens is not None and tokens is not retTokens:
1602 retTokens = ParseResults( tokens,
1603 self.resultsName,
1604 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1605 modal=self.modalResults )
1606 if debugging:
1607 #~ print ("Matched",self,"->",retTokens.asList())
1608 if (self.debugActions[1] ):
1609 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1610
1611 return loc, retTokens
1612
1613 def tryParse( self, instring, loc ):
1614 try:
1615 return self._parse( instring, loc, doActions=False )[0]
1616 except ParseFatalException:
1617 raise ParseException( instring, loc, self.errmsg, self)
1618
1619 def canParseNext(self, instring, loc):
1620 try:
1621 self.tryParse(instring, loc)
1622 except (ParseException, IndexError):
1623 return False
1624 else:
1625 return True
1626
1627 class _UnboundedCache(object):
1628 def __init__(self):
1629 cache = {}
1630 self.not_in_cache = not_in_cache = object()
1631
1632 def get(self, key):
1633 return cache.get(key, not_in_cache)
1634
1635 def set(self, key, value):
1636 cache[key] = value
1637
1638 def clear(self):
1639 cache.clear()
1640
1641 def cache_len(self):
1642 return len(cache)
1643
1644 self.get = types.MethodType(get, self)
1645 self.set = types.MethodType(set, self)
1646 self.clear = types.MethodType(clear, self)
1647 self.__len__ = types.MethodType(cache_len, self)
1648
1649 if _OrderedDict is not None:
1650 class _FifoCache(object):
1651 def __init__(self, size):
1652 self.not_in_cache = not_in_cache = object()
1653
1654 cache = _OrderedDict()
1655
1656 def get(self, key):
1657 return cache.get(key, not_in_cache)
1658
1659 def set(self, key, value):
1660 cache[key] = value
1661 while len(cache) > size:
1662 try:
1663 cache.popitem(False)
1664 except KeyError:
1665 pass
1666
1667 def clear(self):
1668 cache.clear()
1669
1670 def cache_len(self):
1671 return len(cache)
1672
1673 self.get = types.MethodType(get, self)
1674 self.set = types.MethodType(set, self)
1675 self.clear = types.MethodType(clear, self)
1676 self.__len__ = types.MethodType(cache_len, self)
1677
1678 else:
1679 class _FifoCache(object):
1680 def __init__(self, size):
1681 self.not_in_cache = not_in_cache = object()
1682
1683 cache = {}
1684 key_fifo = collections.deque([], size)
1685
1686 def get(self, key):
1687 return cache.get(key, not_in_cache)
1688
1689 def set(self, key, value):
1690 cache[key] = value
1691 while len(key_fifo) > size:
1692 cache.pop(key_fifo.popleft(), None)
1693 key_fifo.append(key)
1694
1695 def clear(self):
1696 cache.clear()
1697 key_fifo.clear()
1698
1699 def cache_len(self):
1700 return len(cache)
1701
1702 self.get = types.MethodType(get, self)
1703 self.set = types.MethodType(set, self)
1704 self.clear = types.MethodType(clear, self)
1705 self.__len__ = types.MethodType(cache_len, self)
1706
1707 # argument cache for optimizing repeated calls when backtracking through recursive expressions
1708 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1709 packrat_cache_lock = RLock()
1710 packrat_cache_stats = [0, 0]
1711
1712 # this method gets repeatedly called during backtracking with the same arguments -
1713 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1714 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1715 HIT, MISS = 0, 1
1716 lookup = (self, instring, loc, callPreParse, doActions)
1717 with ParserElement.packrat_cache_lock:
1718 cache = ParserElement.packrat_cache
1719 value = cache.get(lookup)
1720 if value is cache.not_in_cache:
1721 ParserElement.packrat_cache_stats[MISS] += 1
1722 try:
1723 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1724 except ParseBaseException as pe:
1725 # cache a copy of the exception, without the traceback
1726 cache.set(lookup, pe.__class__(*pe.args))
1727 raise
1728 else:
1729 cache.set(lookup, (value[0], value[1].copy()))
1730 return value
1731 else:
1732 ParserElement.packrat_cache_stats[HIT] += 1
1733 if isinstance(value, Exception):
1734 raise value
1735 return (value[0], value[1].copy())
1736
1737 _parse = _parseNoCache
1738
1739 @staticmethod
1740 def resetCache():
1741 ParserElement.packrat_cache.clear()
1742 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1743
1744 _packratEnabled = False
1745 @staticmethod
1746 def enablePackrat(cache_size_limit=128):
1747 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1748 Repeated parse attempts at the same string location (which happens
1749 often in many complex grammars) can immediately return a cached value,
1750 instead of re-executing parsing/validating code. Memoizing is done of
1751 both valid results and parsing exceptions.
1752
1753 Parameters:
1754
1755 - cache_size_limit - (default= ``128``) - if an integer value is provided
1756 will limit the size of the packrat cache; if None is passed, then
1757 the cache size will be unbounded; if 0 is passed, the cache will
1758 be effectively disabled.
1759
1760 This speedup may break existing programs that use parse actions that
1761 have side-effects. For this reason, packrat parsing is disabled when
1762 you first import pyparsing. To activate the packrat feature, your
1763 program must call the class method :class:`ParserElement.enablePackrat`.
1764 For best results, call ``enablePackrat()`` immediately after
1765 importing pyparsing.
1766
1767 Example::
1768
1769 from pip._vendor import pyparsing
1770 pyparsing.ParserElement.enablePackrat()
1771 """
1772 if not ParserElement._packratEnabled:
1773 ParserElement._packratEnabled = True
1774 if cache_size_limit is None:
1775 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1776 else:
1777 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1778 ParserElement._parse = ParserElement._parseCache
1779
1780 def parseString( self, instring, parseAll=False ):
1781 """
1782 Execute the parse expression with the given string.
1783 This is the main interface to the client code, once the complete
1784 expression has been built.
1785
1786 If you want the grammar to require that the entire input string be
1787 successfully parsed, then set ``parseAll`` to True (equivalent to ending
1788 the grammar with ``StringEnd()``).
1789
1790 Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
1791 in order to report proper column numbers in parse actions.
1792 If the input string contains tabs and
1793 the grammar uses parse actions that use the ``loc`` argument to index into the
1794 string being parsed, you can ensure you have a consistent view of the input
1795 string by:
1796
1797 - calling ``parseWithTabs`` on your grammar before calling ``parseString``
1798 (see :class:`parseWithTabs`)
1799 - define your parse action using the full ``(s,loc,toks)`` signature, and
1800 reference the input string using the parse action's ``s`` argument
1801 - explictly expand the tabs in your input string before calling
1802 ``parseString``
1803
1804 Example::
1805
1806 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1807 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1808 """
1809 ParserElement.resetCache()
1810 if not self.streamlined:
1811 self.streamline()
1812 #~ self.saveAsList = True
1813 for e in self.ignoreExprs:
1814 e.streamline()
1815 if not self.keepTabs:
1816 instring = instring.expandtabs()
1817 try:
1818 loc, tokens = self._parse( instring, 0 )
1819 if parseAll:
1820 loc = self.preParse( instring, loc )
1821 se = Empty() + StringEnd()
1822 se._parse( instring, loc )
1823 except ParseBaseException as exc:
1824 if ParserElement.verbose_stacktrace:
1825 raise
1826 else:
1827 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1828 raise exc
1829 else:
1830 return tokens
1831
1832 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1833 """
1834 Scan the input string for expression matches. Each match will return the
1835 matching tokens, start location, and end location. May be called with optional
1836 ``maxMatches`` argument, to clip scanning after 'n' matches are found. If
1837 ``overlap`` is specified, then overlapping matches will be reported.
1838
1839 Note that the start and end locations are reported relative to the string
1840 being parsed. See :class:`parseString` for more information on parsing
1841 strings with embedded tabs.
1842
1843 Example::
1844
1845 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1846 print(source)
1847 for tokens,start,end in Word(alphas).scanString(source):
1848 print(' '*start + '^'*(end-start))
1849 print(' '*start + tokens[0])
1850
1851 prints::
1852
1853 sldjf123lsdjjkf345sldkjf879lkjsfd987
1854 ^^^^^
1855 sldjf
1856 ^^^^^^^
1857 lsdjjkf
1858 ^^^^^^
1859 sldkjf
1860 ^^^^^^
1861 lkjsfd
1862 """
1863 if not self.streamlined:
1864 self.streamline()
1865 for e in self.ignoreExprs:
1866 e.streamline()
1867
1868 if not self.keepTabs:
1869 instring = _ustr(instring).expandtabs()
1870 instrlen = len(instring)
1871 loc = 0
1872 preparseFn = self.preParse
1873 parseFn = self._parse
1874 ParserElement.resetCache()
1875 matches = 0
1876 try:
1877 while loc <= instrlen and matches < maxMatches:
1878 try:
1879 preloc = preparseFn( instring, loc )
1880 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1881 except ParseException:
1882 loc = preloc+1
1883 else:
1884 if nextLoc > loc:
1885 matches += 1
1886 yield tokens, preloc, nextLoc
1887 if overlap:
1888 nextloc = preparseFn( instring, loc )
1889 if nextloc > loc:
1890 loc = nextLoc
1891 else:
1892 loc += 1
1893 else:
1894 loc = nextLoc
1895 else:
1896 loc = preloc+1
1897 except ParseBaseException as exc:
1898 if ParserElement.verbose_stacktrace:
1899 raise
1900 else:
1901 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1902 raise exc
1903
1904 def transformString( self, instring ):
1905 """
1906 Extension to :class:`scanString`, to modify matching text with modified tokens that may
1907 be returned from a parse action. To use ``transformString``, define a grammar and
1908 attach a parse action to it that modifies the returned token list.
1909 Invoking ``transformString()`` on a target string will then scan for matches,
1910 and replace the matched text patterns according to the logic in the parse
1911 action. ``transformString()`` returns the resulting transformed string.
1912
1913 Example::
1914
1915 wd = Word(alphas)
1916 wd.setParseAction(lambda toks: toks[0].title())
1917
1918 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
1919
1920 prints::
1921
1922 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1923 """
1924 out = []
1925 lastE = 0
1926 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1927 # keep string locs straight between transformString and scanString
1928 self.keepTabs = True
1929 try:
1930 for t,s,e in self.scanString( instring ):
1931 out.append( instring[lastE:s] )
1932 if t:
1933 if isinstance(t,ParseResults):
1934 out += t.asList()
1935 elif isinstance(t,list):
1936 out += t
1937 else:
1938 out.append(t)
1939 lastE = e
1940 out.append(instring[lastE:])
1941 out = [o for o in out if o]
1942 return "".join(map(_ustr,_flatten(out)))
1943 except ParseBaseException as exc:
1944 if ParserElement.verbose_stacktrace:
1945 raise
1946 else:
1947 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1948 raise exc
1949
1950 def searchString( self, instring, maxMatches=_MAX_INT ):
1951 """
1952 Another extension to :class:`scanString`, simplifying the access to the tokens found
1953 to match the given parse expression. May be called with optional
1954 ``maxMatches`` argument, to clip searching after 'n' matches are found.
1955
1956 Example::
1957
1958 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1959 cap_word = Word(alphas.upper(), alphas.lower())
1960
1961 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1962
1963 # the sum() builtin can be used to merge results into a single ParseResults object
1964 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
1965
1966 prints::
1967
1968 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1969 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1970 """
1971 try:
1972 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1973 except ParseBaseException as exc:
1974 if ParserElement.verbose_stacktrace:
1975 raise
1976 else:
1977 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1978 raise exc
1979
1980 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1981 """
1982 Generator method to split a string using the given expression as a separator.
1983 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1984 and the optional ``includeSeparators`` argument (default= ``False``), if the separating
1985 matching text should be included in the split results.
1986
1987 Example::
1988
1989 punc = oneOf(list(".,;:/-!?"))
1990 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1991
1992 prints::
1993
1994 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1995 """
1996 splits = 0
1997 last = 0
1998 for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1999 yield instring[last:s]
2000 if includeSeparators:
2001 yield t[0]
2002 last = e
2003 yield instring[last:]
2004
2005 def __add__(self, other ):
2006 """
2007 Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
2008 converts them to :class:`Literal`s by default.
2009
2010 Example::
2011
2012 greet = Word(alphas) + "," + Word(alphas) + "!"
2013 hello = "Hello, World!"
2014 print (hello, "->", greet.parseString(hello))
2015
2016 prints::
2017
2018 Hello, World! -> ['Hello', ',', 'World', '!']
2019 """
2020 if isinstance( other, basestring ):
2021 other = ParserElement._literalStringClass( other )
2022 if not isinstance( other, ParserElement ):
2023 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2024 SyntaxWarning, stacklevel=2)
2025 return None
2026 return And( [ self, other ] )
2027
2028 def __radd__(self, other ):
2029 """
2030 Implementation of + operator when left operand is not a :class:`ParserElement`
2031 """
2032 if isinstance( other, basestring ):
2033 other = ParserElement._literalStringClass( other )
2034 if not isinstance( other, ParserElement ):
2035 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2036 SyntaxWarning, stacklevel=2)
2037 return None
2038 return other + self
2039
2040 def __sub__(self, other):
2041 """
2042 Implementation of - operator, returns :class:`And` with error stop
2043 """
2044 if isinstance( other, basestring ):
2045 other = ParserElement._literalStringClass( other )
2046 if not isinstance( other, ParserElement ):
2047 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2048 SyntaxWarning, stacklevel=2)
2049 return None
2050 return self + And._ErrorStop() + other
2051
2052 def __rsub__(self, other ):
2053 """
2054 Implementation of - operator when left operand is not a :class:`ParserElement`
2055 """
2056 if isinstance( other, basestring ):
2057 other = ParserElement._literalStringClass( other )
2058 if not isinstance( other, ParserElement ):
2059 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2060 SyntaxWarning, stacklevel=2)
2061 return None
2062 return other - self
2063
2064 def __mul__(self,other):
2065 """
2066 Implementation of * operator, allows use of ``expr * 3`` in place of
2067 ``expr + expr + expr``. Expressions may also me multiplied by a 2-integer
2068 tuple, similar to ``{min,max}`` multipliers in regular expressions. Tuples
2069 may also include ``None`` as in:
2070 - ``expr*(n,None)`` or ``expr*(n,)`` is equivalent
2071 to ``expr*n + ZeroOrMore(expr)``
2072 (read as "at least n instances of ``expr``")
2073 - ``expr*(None,n)`` is equivalent to ``expr*(0,n)``
2074 (read as "0 to n instances of ``expr``")
2075 - ``expr*(None,None)`` is equivalent to ``ZeroOrMore(expr)``
2076 - ``expr*(1,None)`` is equivalent to ``OneOrMore(expr)``
2077
2078 Note that ``expr*(None,n)`` does not raise an exception if
2079 more than n exprs exist in the input stream; that is,
2080 ``expr*(None,n)`` does not enforce a maximum number of expr
2081 occurrences. If this behavior is desired, then write
2082 ``expr*(None,n) + ~expr``
2083 """
2084 if isinstance(other,int):
2085 minElements, optElements = other,0
2086 elif isinstance(other,tuple):
2087 other = (other + (None, None))[:2]
2088 if other[0] is None:
2089 other = (0, other[1])
2090 if isinstance(other[0],int) and other[1] is None:
2091 if other[0] == 0:
2092 return ZeroOrMore(self)
2093 if other[0] == 1:
2094 return OneOrMore(self)
2095 else:
2096 return self*other[0] + ZeroOrMore(self)
2097 elif isinstance(other[0],int) and isinstance(other[1],int):
2098 minElements, optElements = other
2099 optElements -= minElements
2100 else:
2101 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
2102 else:
2103 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
2104
2105 if minElements < 0:
2106 raise ValueError("cannot multiply ParserElement by negative value")
2107 if optElements < 0:
2108 raise ValueError("second tuple value must be greater or equal to first tuple value")
2109 if minElements == optElements == 0:
2110 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
2111
2112 if (optElements):
2113 def makeOptionalList(n):
2114 if n>1:
2115 return Optional(self + makeOptionalList(n-1))
2116 else:
2117 return Optional(self)
2118 if minElements:
2119 if minElements == 1:
2120 ret = self + makeOptionalList(optElements)
2121 else:
2122 ret = And([self]*minElements) + makeOptionalList(optElements)
2123 else:
2124 ret = makeOptionalList(optElements)
2125 else:
2126 if minElements == 1:
2127 ret = self
2128 else:
2129 ret = And([self]*minElements)
2130 return ret
2131
2132 def __rmul__(self, other):
2133 return self.__mul__(other)
2134
2135 def __or__(self, other ):
2136 """
2137 Implementation of | operator - returns :class:`MatchFirst`
2138 """
2139 if isinstance( other, basestring ):
2140 other = ParserElement._literalStringClass( other )
2141 if not isinstance( other, ParserElement ):
2142 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2143 SyntaxWarning, stacklevel=2)
2144 return None
2145 return MatchFirst( [ self, other ] )
2146
2147 def __ror__(self, other ):
2148 """
2149 Implementation of | operator when left operand is not a :class:`ParserElement`
2150 """
2151 if isinstance( other, basestring ):
2152 other = ParserElement._literalStringClass( other )
2153 if not isinstance( other, ParserElement ):
2154 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2155 SyntaxWarning, stacklevel=2)
2156 return None
2157 return other | self
2158
2159 def __xor__(self, other ):
2160 """
2161 Implementation of ^ operator - returns :class:`Or`
2162 """
2163 if isinstance( other, basestring ):
2164 other = ParserElement._literalStringClass( other )
2165 if not isinstance( other, ParserElement ):
2166 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2167 SyntaxWarning, stacklevel=2)
2168 return None
2169 return Or( [ self, other ] )
2170
2171 def __rxor__(self, other ):
2172 """
2173 Implementation of ^ operator when left operand is not a :class:`ParserElement`
2174 """
2175 if isinstance( other, basestring ):
2176 other = ParserElement._literalStringClass( other )
2177 if not isinstance( other, ParserElement ):
2178 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2179 SyntaxWarning, stacklevel=2)
2180 return None
2181 return other ^ self
2182
2183 def __and__(self, other ):
2184 """
2185 Implementation of & operator - returns :class:`Each`
2186 """
2187 if isinstance( other, basestring ):
2188 other = ParserElement._literalStringClass( other )
2189 if not isinstance( other, ParserElement ):
2190 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2191 SyntaxWarning, stacklevel=2)
2192 return None
2193 return Each( [ self, other ] )
2194
2195 def __rand__(self, other ):
2196 """
2197 Implementation of & operator when left operand is not a :class:`ParserElement`
2198 """
2199 if isinstance( other, basestring ):
2200 other = ParserElement._literalStringClass( other )
2201 if not isinstance( other, ParserElement ):
2202 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2203 SyntaxWarning, stacklevel=2)
2204 return None
2205 return other & self
2206
2207 def __invert__( self ):
2208 """
2209 Implementation of ~ operator - returns :class:`NotAny`
2210 """
2211 return NotAny( self )
2212
2213 def __call__(self, name=None):
2214 """
2215 Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
2216
2217 If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
2218 passed as ``True``.
2219
2220 If ``name` is omitted, same as calling :class:`copy`.
2221
2222 Example::
2223
2224 # these are equivalent
2225 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
2226 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
2227 """
2228 if name is not None:
2229 return self.setResultsName(name)
2230 else:
2231 return self.copy()
2232
2233 def suppress( self ):
2234 """
2235 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
2236 cluttering up returned output.
2237 """
2238 return Suppress( self )
2239
2240 def leaveWhitespace( self ):
2241 """
2242 Disables the skipping of whitespace before matching the characters in the
2243 :class:`ParserElement`'s defined pattern. This is normally only used internally by
2244 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2245 """
2246 self.skipWhitespace = False
2247 return self
2248
2249 def setWhitespaceChars( self, chars ):
2250 """
2251 Overrides the default whitespace chars
2252 """
2253 self.skipWhitespace = True
2254 self.whiteChars = chars
2255 self.copyDefaultWhiteChars = False
2256 return self
2257
2258 def parseWithTabs( self ):
2259 """
2260 Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string.
2261 Must be called before ``parseString`` when the input grammar contains elements that
2262 match ``<TAB>`` characters.
2263 """
2264 self.keepTabs = True
2265 return self
2266
2267 def ignore( self, other ):
2268 """
2269 Define expression to be ignored (e.g., comments) while doing pattern
2270 matching; may be called repeatedly, to define multiple comment or other
2271 ignorable patterns.
2272
2273 Example::
2274
2275 patt = OneOrMore(Word(alphas))
2276 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2277
2278 patt.ignore(cStyleComment)
2279 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2280 """
2281 if isinstance(other, basestring):
2282 other = Suppress(other)
2283
2284 if isinstance( other, Suppress ):
2285 if other not in self.ignoreExprs:
2286 self.ignoreExprs.append(other)
2287 else:
2288 self.ignoreExprs.append( Suppress( other.copy() ) )
2289 return self
2290
2291 def setDebugActions( self, startAction, successAction, exceptionAction ):
2292 """
2293 Enable display of debugging messages while doing pattern matching.
2294 """
2295 self.debugActions = (startAction or _defaultStartDebugAction,
2296 successAction or _defaultSuccessDebugAction,
2297 exceptionAction or _defaultExceptionDebugAction)
2298 self.debug = True
2299 return self
2300
2301 def setDebug( self, flag=True ):
2302 """
2303 Enable display of debugging messages while doing pattern matching.
2304 Set ``flag`` to True to enable, False to disable.
2305
2306 Example::
2307
2308 wd = Word(alphas).setName("alphaword")
2309 integer = Word(nums).setName("numword")
2310 term = wd | integer
2311
2312 # turn on debugging for wd
2313 wd.setDebug()
2314
2315 OneOrMore(term).parseString("abc 123 xyz 890")
2316
2317 prints::
2318
2319 Match alphaword at loc 0(1,1)
2320 Matched alphaword -> ['abc']
2321 Match alphaword at loc 3(1,4)
2322 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2323 Match alphaword at loc 7(1,8)
2324 Matched alphaword -> ['xyz']
2325 Match alphaword at loc 11(1,12)
2326 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2327 Match alphaword at loc 15(1,16)
2328 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2329
2330 The output shown is that produced by the default debug actions - custom debug actions can be
2331 specified using :class:`setDebugActions`. Prior to attempting
2332 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2333 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2334 message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
2335 which makes debugging and exception messages easier to understand - for instance, the default
2336 name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
2337 """
2338 if flag:
2339 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2340 else:
2341 self.debug = False
2342 return self
2343
2344 def __str__( self ):
2345 return self.name
2346
2347 def __repr__( self ):
2348 return _ustr(self)
2349
2350 def streamline( self ):
2351 self.streamlined = True
2352 self.strRepr = None
2353 return self
2354
2355 def checkRecursion( self, parseElementList ):
2356 pass
2357
2358 def validate( self, validateTrace=[] ):
2359 """
2360 Check defined expressions for valid structure, check for infinite recursive definitions.
2361 """
2362 self.checkRecursion( [] )
2363
2364 def parseFile( self, file_or_filename, parseAll=False ):
2365 """
2366 Execute the parse expression on the given file or filename.
2367 If a filename is specified (instead of a file object),
2368 the entire file is opened, read, and closed before parsing.
2369 """
2370 try:
2371 file_contents = file_or_filename.read()
2372 except AttributeError:
2373 with open(file_or_filename, "r") as f:
2374 file_contents = f.read()
2375 try:
2376 return self.parseString(file_contents, parseAll)
2377 except ParseBaseException as exc:
2378 if ParserElement.verbose_stacktrace:
2379 raise
2380 else:
2381 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2382 raise exc
2383
2384 def __eq__(self,other):
2385 if isinstance(other, ParserElement):
2386 return self is other or vars(self) == vars(other)
2387 elif isinstance(other, basestring):
2388 return self.matches(other)
2389 else:
2390 return super(ParserElement,self)==other
2391
2392 def __ne__(self,other):
2393 return not (self == other)
2394
2395 def __hash__(self):
2396 return hash(id(self))
2397
2398 def __req__(self,other):
2399 return self == other
2400
2401 def __rne__(self,other):
2402 return not (self == other)
2403
2404 def matches(self, testString, parseAll=True):
2405 """
2406 Method for quick testing of a parser against a test string. Good for simple
2407 inline microtests of sub expressions while building up larger parser.
2408
2409 Parameters:
2410 - testString - to test against this expression for a match
2411 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2412
2413 Example::
2414
2415 expr = Word(nums)
2416 assert expr.matches("100")
2417 """
2418 try:
2419 self.parseString(_ustr(testString), parseAll=parseAll)
2420 return True
2421 except ParseBaseException:
2422 return False
2423
2424 def runTests(self, tests, parseAll=True, comment='#',
2425 fullDump=True, printResults=True, failureTests=False, postParse=None):
2426 """
2427 Execute the parse expression on a series of test strings, showing each
2428 test, the parsed results or where the parse failed. Quick and easy way to
2429 run a parse expression against a list of sample strings.
2430
2431 Parameters:
2432 - tests - a list of separate test strings, or a multiline string of test strings
2433 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2434 - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
2435 string; pass None to disable comment filtering
2436 - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
2437 if False, only dump nested list
2438 - printResults - (default= ``True``) prints test output to stdout
2439 - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
2440 - postParse - (default= ``None``) optional callback for successful parse results; called as
2441 `fn(test_string, parse_results)` and returns a string to be added to the test output
2442
2443 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2444 (or failed if ``failureTests`` is True), and the results contain a list of lines of each
2445 test's output
2446
2447 Example::
2448
2449 number_expr = pyparsing_common.number.copy()
2450
2451 result = number_expr.runTests('''
2452 # unsigned integer
2453 100
2454 # negative integer
2455 -100
2456 # float with scientific notation
2457 6.02e23
2458 # integer with scientific notation
2459 1e-12
2460 ''')
2461 print("Success" if result[0] else "Failed!")
2462
2463 result = number_expr.runTests('''
2464 # stray character
2465 100Z
2466 # missing leading digit before '.'
2467 -.100
2468 # too many '.'
2469 3.14.159
2470 ''', failureTests=True)
2471 print("Success" if result[0] else "Failed!")
2472
2473 prints::
2474
2475 # unsigned integer
2476 100
2477 [100]
2478
2479 # negative integer
2480 -100
2481 [-100]
2482
2483 # float with scientific notation
2484 6.02e23
2485 [6.02e+23]
2486
2487 # integer with scientific notation
2488 1e-12
2489 [1e-12]
2490
2491 Success
2492
2493 # stray character
2494 100Z
2495 ^
2496 FAIL: Expected end of text (at char 3), (line:1, col:4)
2497
2498 # missing leading digit before '.'
2499 -.100
2500 ^
2501 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2502
2503 # too many '.'
2504 3.14.159
2505 ^
2506 FAIL: Expected end of text (at char 4), (line:1, col:5)
2507
2508 Success
2509
2510 Each test string must be on a single line. If you want to test a string that spans multiple
2511 lines, create a test like this::
2512
2513 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2514
2515 (Note that this is a raw string literal, you must include the leading 'r'.)
2516 """
2517 if isinstance(tests, basestring):
2518 tests = list(map(str.strip, tests.rstrip().splitlines()))
2519 if isinstance(comment, basestring):
2520 comment = Literal(comment)
2521 allResults = []
2522 comments = []
2523 success = True
2524 for t in tests:
2525 if comment is not None and comment.matches(t, False) or comments and not t:
2526 comments.append(t)
2527 continue
2528 if not t:
2529 continue
2530 out = ['\n'.join(comments), t]
2531 comments = []
2532 try:
2533 # convert newline marks to actual newlines, and strip leading BOM if present
2534 NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
2535 BOM = '\ufeff'
2536 t = NL.transformString(t.lstrip(BOM))
2537 result = self.parseString(t, parseAll=parseAll)
2538 out.append(result.dump(full=fullDump))
2539 success = success and not failureTests
2540 if postParse is not None:
2541 try:
2542 pp_value = postParse(t, result)
2543 if pp_value is not None:
2544 out.append(str(pp_value))
2545 except Exception as e:
2546 out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
2547 except ParseBaseException as pe:
2548 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2549 if '\n' in t:
2550 out.append(line(pe.loc, t))
2551 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2552 else:
2553 out.append(' '*pe.loc + '^' + fatal)
2554 out.append("FAIL: " + str(pe))
2555 success = success and failureTests
2556 result = pe
2557 except Exception as exc:
2558 out.append("FAIL-EXCEPTION: " + str(exc))
2559 success = success and failureTests
2560 result = exc
2561
2562 if printResults:
2563 if fullDump:
2564 out.append('')
2565 print('\n'.join(out))
2566
2567 allResults.append((t, result))
2568
2569 return success, allResults
2570
2571
2572 class Token(ParserElement):
2573 """Abstract :class:`ParserElement` subclass, for defining atomic
2574 matching patterns.
2575 """
2576 def __init__( self ):
2577 super(Token,self).__init__( savelist=False )
2578
2579
2580 class Empty(Token):
2581 """An empty token, will always match.
2582 """
2583 def __init__( self ):
2584 super(Empty,self).__init__()
2585 self.name = "Empty"
2586 self.mayReturnEmpty = True
2587 self.mayIndexError = False
2588
2589
2590 class NoMatch(Token):
2591 """A token that will never match.
2592 """
2593 def __init__( self ):
2594 super(NoMatch,self).__init__()
2595 self.name = "NoMatch"
2596 self.mayReturnEmpty = True
2597 self.mayIndexError = False
2598 self.errmsg = "Unmatchable token"
2599
2600 def parseImpl( self, instring, loc, doActions=True ):
2601 raise ParseException(instring, loc, self.errmsg, self)
2602
2603
2604 class Literal(Token):
2605 """Token to exactly match a specified string.
2606
2607 Example::
2608
2609 Literal('blah').parseString('blah') # -> ['blah']
2610 Literal('blah').parseString('blahfooblah') # -> ['blah']
2611 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2612
2613 For case-insensitive matching, use :class:`CaselessLiteral`.
2614
2615 For keyword matching (force word break before and after the matched string),
2616 use :class:`Keyword` or :class:`CaselessKeyword`.
2617 """
2618 def __init__( self, matchString ):
2619 super(Literal,self).__init__()
2620 self.match = matchString
2621 self.matchLen = len(matchString)
2622 try:
2623 self.firstMatchChar = matchString[0]
2624 except IndexError:
2625 warnings.warn("null string passed to Literal; use Empty() instead",
2626 SyntaxWarning, stacklevel=2)
2627 self.__class__ = Empty
2628 self.name = '"%s"' % _ustr(self.match)
2629 self.errmsg = "Expected " + self.name
2630 self.mayReturnEmpty = False
2631 self.mayIndexError = False
2632
2633 # Performance tuning: this routine gets called a *lot*
2634 # if this is a single character match string and the first character matches,
2635 # short-circuit as quickly as possible, and avoid calling startswith
2636 #~ @profile
2637 def parseImpl( self, instring, loc, doActions=True ):
2638 if (instring[loc] == self.firstMatchChar and
2639 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2640 return loc+self.matchLen, self.match
2641 raise ParseException(instring, loc, self.errmsg, self)
2642 _L = Literal
2643 ParserElement._literalStringClass = Literal
2644
2645 class Keyword(Token):
2646 """Token to exactly match a specified string as a keyword, that is,
2647 it must be immediately followed by a non-keyword character. Compare
2648 with :class:`Literal`:
2649
2650 - ``Literal("if")`` will match the leading ``'if'`` in
2651 ``'ifAndOnlyIf'``.
2652 - ``Keyword("if")`` will not; it will only match the leading
2653 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2654
2655 Accepts two optional constructor arguments in addition to the
2656 keyword string:
2657
2658 - ``identChars`` is a string of characters that would be valid
2659 identifier characters, defaulting to all alphanumerics + "_" and
2660 "$"
2661 - ``caseless`` allows case-insensitive matching, default is ``False``.
2662
2663 Example::
2664
2665 Keyword("start").parseString("start") # -> ['start']
2666 Keyword("start").parseString("starting") # -> Exception
2667
2668 For case-insensitive matching, use :class:`CaselessKeyword`.
2669 """
2670 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2671
2672 def __init__( self, matchString, identChars=None, caseless=False ):
2673 super(Keyword,self).__init__()
2674 if identChars is None:
2675 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2676 self.match = matchString
2677 self.matchLen = len(matchString)
2678 try:
2679 self.firstMatchChar = matchString[0]
2680 except IndexError:
2681 warnings.warn("null string passed to Keyword; use Empty() instead",
2682 SyntaxWarning, stacklevel=2)
2683 self.name = '"%s"' % self.match
2684 self.errmsg = "Expected " + self.name
2685 self.mayReturnEmpty = False
2686 self.mayIndexError = False
2687 self.caseless = caseless
2688 if caseless:
2689 self.caselessmatch = matchString.upper()
2690 identChars = identChars.upper()
2691 self.identChars = set(identChars)
2692
2693 def parseImpl( self, instring, loc, doActions=True ):
2694 if self.caseless:
2695 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2696 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2697 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2698 return loc+self.matchLen, self.match
2699 else:
2700 if (instring[loc] == self.firstMatchChar and
2701 (self.matchLen==1 or instring.startswith(self.match,loc)) and
2702 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2703 (loc == 0 or instring[loc-1] not in self.identChars) ):
2704 return loc+self.matchLen, self.match
2705 raise ParseException(instring, loc, self.errmsg, self)
2706
2707 def copy(self):
2708 c = super(Keyword,self).copy()
2709 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2710 return c
2711
2712 @staticmethod
2713 def setDefaultKeywordChars( chars ):
2714 """Overrides the default Keyword chars
2715 """
2716 Keyword.DEFAULT_KEYWORD_CHARS = chars
2717
2718 class CaselessLiteral(Literal):
2719 """Token to match a specified string, ignoring case of letters.
2720 Note: the matched results will always be in the case of the given
2721 match string, NOT the case of the input text.
2722
2723 Example::
2724
2725 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2726
2727 (Contrast with example for :class:`CaselessKeyword`.)
2728 """
2729 def __init__( self, matchString ):
2730 super(CaselessLiteral,self).__init__( matchString.upper() )
2731 # Preserve the defining literal.
2732 self.returnString = matchString
2733 self.name = "'%s'" % self.returnString
2734 self.errmsg = "Expected " + self.name
2735
2736 def parseImpl( self, instring, loc, doActions=True ):
2737 if instring[ loc:loc+self.matchLen ].upper() == self.match:
2738 return loc+self.matchLen, self.returnString
2739 raise ParseException(instring, loc, self.errmsg, self)
2740
2741 class CaselessKeyword(Keyword):
2742 """
2743 Caseless version of :class:`Keyword`.
2744
2745 Example::
2746
2747 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2748
2749 (Contrast with example for :class:`CaselessLiteral`.)
2750 """
2751 def __init__( self, matchString, identChars=None ):
2752 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2753
2754 class CloseMatch(Token):
2755 """A variation on :class:`Literal` which matches "close" matches,
2756 that is, strings with at most 'n' mismatching characters.
2757 :class:`CloseMatch` takes parameters:
2758
2759 - ``match_string`` - string to be matched
2760 - ``maxMismatches`` - (``default=1``) maximum number of
2761 mismatches allowed to count as a match
2762
2763 The results from a successful parse will contain the matched text
2764 from the input string and the following named results:
2765
2766 - ``mismatches`` - a list of the positions within the
2767 match_string where mismatches were found
2768 - ``original`` - the original match_string used to compare
2769 against the input string
2770
2771 If ``mismatches`` is an empty list, then the match was an exact
2772 match.
2773
2774 Example::
2775
2776 patt = CloseMatch("ATCATCGAATGGA")
2777 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2778 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2779
2780 # exact match
2781 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2782
2783 # close match allowing up to 2 mismatches
2784 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2785 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2786 """
2787 def __init__(self, match_string, maxMismatches=1):
2788 super(CloseMatch,self).__init__()
2789 self.name = match_string
2790 self.match_string = match_string
2791 self.maxMismatches = maxMismatches
2792 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2793 self.mayIndexError = False
2794 self.mayReturnEmpty = False
2795
2796 def parseImpl( self, instring, loc, doActions=True ):
2797 start = loc
2798 instrlen = len(instring)
2799 maxloc = start + len(self.match_string)
2800
2801 if maxloc <= instrlen:
2802 match_string = self.match_string
2803 match_stringloc = 0
2804 mismatches = []
2805 maxMismatches = self.maxMismatches
2806
2807 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2808 src,mat = s_m
2809 if src != mat:
2810 mismatches.append(match_stringloc)
2811 if len(mismatches) > maxMismatches:
2812 break
2813 else:
2814 loc = match_stringloc + 1
2815 results = ParseResults([instring[start:loc]])
2816 results['original'] = self.match_string
2817 results['mismatches'] = mismatches
2818 return loc, results
2819
2820 raise ParseException(instring, loc, self.errmsg, self)
2821
2822
2823 class Word(Token):
2824 """Token for matching words composed of allowed character sets.
2825 Defined with string containing all allowed initial characters, an
2826 optional string containing allowed body characters (if omitted,
2827 defaults to the initial character set), and an optional minimum,
2828 maximum, and/or exact length. The default value for ``min`` is
2829 1 (a minimum value < 1 is not valid); the default values for
2830 ``max`` and ``exact`` are 0, meaning no maximum or exact
2831 length restriction. An optional ``excludeChars`` parameter can
2832 list characters that might be found in the input ``bodyChars``
2833 string; useful to define a word of all printables except for one or
2834 two characters, for instance.
2835
2836 :class:`srange` is useful for defining custom character set strings
2837 for defining ``Word`` expressions, using range notation from
2838 regular expression character sets.
2839
2840 A common mistake is to use :class:`Word` to match a specific literal
2841 string, as in ``Word("Address")``. Remember that :class:`Word`
2842 uses the string argument to define *sets* of matchable characters.
2843 This expression would match "Add", "AAA", "dAred", or any other word
2844 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2845 exact literal string, use :class:`Literal` or :class:`Keyword`.
2846
2847 pyparsing includes helper strings for building Words:
2848
2849 - :class:`alphas`
2850 - :class:`nums`
2851 - :class:`alphanums`
2852 - :class:`hexnums`
2853 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2854 - accented, tilded, umlauted, etc.)
2855 - :class:`punc8bit` (non-alphabetic characters in ASCII range
2856 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2857 - :class:`printables` (any non-whitespace character)
2858
2859 Example::
2860
2861 # a word composed of digits
2862 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2863
2864 # a word with a leading capital, and zero or more lowercase
2865 capital_word = Word(alphas.upper(), alphas.lower())
2866
2867 # hostnames are alphanumeric, with leading alpha, and '-'
2868 hostname = Word(alphas, alphanums+'-')
2869
2870 # roman numeral (not a strict parser, accepts invalid mix of characters)
2871 roman = Word("IVXLCDM")
2872
2873 # any string of non-whitespace characters, except for ','
2874 csv_value = Word(printables, excludeChars=",")
2875 """
2876 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2877 super(Word,self).__init__()
2878 if excludeChars:
2879 excludeChars = set(excludeChars)
2880 initChars = ''.join(c for c in initChars if c not in excludeChars)
2881 if bodyChars:
2882 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2883 self.initCharsOrig = initChars
2884 self.initChars = set(initChars)
2885 if bodyChars :
2886 self.bodyCharsOrig = bodyChars
2887 self.bodyChars = set(bodyChars)
2888 else:
2889 self.bodyCharsOrig = initChars
2890 self.bodyChars = set(initChars)
2891
2892 self.maxSpecified = max > 0
2893
2894 if min < 1:
2895 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2896
2897 self.minLen = min
2898
2899 if max > 0:
2900 self.maxLen = max
2901 else:
2902 self.maxLen = _MAX_INT
2903
2904 if exact > 0:
2905 self.maxLen = exact
2906 self.minLen = exact
2907
2908 self.name = _ustr(self)
2909 self.errmsg = "Expected " + self.name
2910 self.mayIndexError = False
2911 self.asKeyword = asKeyword
2912
2913 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2914 if self.bodyCharsOrig == self.initCharsOrig:
2915 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2916 elif len(self.initCharsOrig) == 1:
2917 self.reString = "%s[%s]*" % \
2918 (re.escape(self.initCharsOrig),
2919 _escapeRegexRangeChars(self.bodyCharsOrig),)
2920 else:
2921 self.reString = "[%s][%s]*" % \
2922 (_escapeRegexRangeChars(self.initCharsOrig),
2923 _escapeRegexRangeChars(self.bodyCharsOrig),)
2924 if self.asKeyword:
2925 self.reString = r"\b"+self.reString+r"\b"
2926 try:
2927 self.re = re.compile( self.reString )
2928 except Exception:
2929 self.re = None
2930
2931 def parseImpl( self, instring, loc, doActions=True ):
2932 if self.re:
2933 result = self.re.match(instring,loc)
2934 if not result:
2935 raise ParseException(instring, loc, self.errmsg, self)
2936
2937 loc = result.end()
2938 return loc, result.group()
2939
2940 if instring[loc] not in self.initChars:
2941 raise ParseException(instring, loc, self.errmsg, self)
2942
2943 start = loc
2944 loc += 1
2945 instrlen = len(instring)
2946 bodychars = self.bodyChars
2947 maxloc = start + self.maxLen
2948 maxloc = min( maxloc, instrlen )
2949 while loc < maxloc and instring[loc] in bodychars:
2950 loc += 1
2951
2952 throwException = False
2953 if loc - start < self.minLen:
2954 throwException = True
2955 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2956 throwException = True
2957 elif self.asKeyword:
2958 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2959 throwException = True
2960
2961 if throwException:
2962 raise ParseException(instring, loc, self.errmsg, self)
2963
2964 return loc, instring[start:loc]
2965
2966 def __str__( self ):
2967 try:
2968 return super(Word,self).__str__()
2969 except Exception:
2970 pass
2971
2972
2973 if self.strRepr is None:
2974
2975 def charsAsStr(s):
2976 if len(s)>4:
2977 return s[:4]+"..."
2978 else:
2979 return s
2980
2981 if ( self.initCharsOrig != self.bodyCharsOrig ):
2982 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2983 else:
2984 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2985
2986 return self.strRepr
2987
2988
2989 class Char(Word):
2990 """A short-cut class for defining ``Word(characters, exact=1)``,
2991 when defining a match of any single character in a string of
2992 characters.
2993 """
2994 def __init__(self, charset, asKeyword=False, excludeChars=None):
2995 super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
2996 self.reString = "[%s]" % _escapeRegexRangeChars(self.initCharsOrig)
2997 self.re = re.compile( self.reString )
2998
2999
3000 class Regex(Token):
3001 r"""Token for matching strings that match a given regular
3002 expression. Defined with string specifying the regular expression in
3003 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3004 If the given regex contains named groups (defined using ``(?P<name>...)``),
3005 these will be preserved as named parse results.
3006
3007 Example::
3008
3009 realnum = Regex(r"[+-]?\d+\.\d*")
3010 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3011 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3012 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3013 """
3014 compiledREtype = type(re.compile("[A-Z]"))
3015 def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
3016 """The parameters ``pattern`` and ``flags`` are passed
3017 to the ``re.compile()`` function as-is. See the Python
3018 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3019 explanation of the acceptable patterns and flags.
3020 """
3021 super(Regex,self).__init__()
3022
3023 if isinstance(pattern, basestring):
3024 if not pattern:
3025 warnings.warn("null string passed to Regex; use Empty() instead",
3026 SyntaxWarning, stacklevel=2)
3027
3028 self.pattern = pattern
3029 self.flags = flags
3030
3031 try:
3032 self.re = re.compile(self.pattern, self.flags)
3033 self.reString = self.pattern
3034 except sre_constants.error:
3035 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
3036 SyntaxWarning, stacklevel=2)
3037 raise
3038
3039 elif isinstance(pattern, Regex.compiledREtype):
3040 self.re = pattern
3041 self.pattern = \
3042 self.reString = str(pattern)
3043 self.flags = flags
3044
3045 else:
3046 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
3047
3048 self.name = _ustr(self)
3049 self.errmsg = "Expected " + self.name
3050 self.mayIndexError = False
3051 self.mayReturnEmpty = True
3052 self.asGroupList = asGroupList
3053 self.asMatch = asMatch
3054 if self.asGroupList:
3055 self.parseImpl = self.parseImplAsGroupList
3056 if self.asMatch:
3057 self.parseImpl = self.parseImplAsMatch
3058
3059 def parseImpl(self, instring, loc, doActions=True):
3060 result = self.re.match(instring,loc)
3061 if not result:
3062 raise ParseException(instring, loc, self.errmsg, self)
3063
3064 loc = result.end()
3065 ret = ParseResults(result.group())
3066 d = result.groupdict()
3067 if d:
3068 for k, v in d.items():
3069 ret[k] = v
3070 return loc, ret
3071
3072 def parseImplAsGroupList(self, instring, loc, doActions=True):
3073 result = self.re.match(instring,loc)
3074 if not result:
3075 raise ParseException(instring, loc, self.errmsg, self)
3076
3077 loc = result.end()
3078 ret = result.groups()
3079 return loc, ret
3080
3081 def parseImplAsMatch(self, instring, loc, doActions=True):
3082 result = self.re.match(instring,loc)
3083 if not result:
3084 raise ParseException(instring, loc, self.errmsg, self)
3085
3086 loc = result.end()
3087 ret = result
3088 return loc, ret
3089
3090 def __str__( self ):
3091 try:
3092 return super(Regex,self).__str__()
3093 except Exception:
3094 pass
3095
3096 if self.strRepr is None:
3097 self.strRepr = "Re:(%s)" % repr(self.pattern)
3098
3099 return self.strRepr
3100
3101 def sub(self, repl):
3102 r"""
3103 Return Regex with an attached parse action to transform the parsed
3104 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3105
3106 Example::
3107
3108 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3109 print(make_html.transformString("h1:main title:"))
3110 # prints "<h1>main title</h1>"
3111 """
3112 if self.asGroupList:
3113 warnings.warn("cannot use sub() with Regex(asGroupList=True)",
3114 SyntaxWarning, stacklevel=2)
3115 raise SyntaxError()
3116
3117 if self.asMatch and callable(repl):
3118 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
3119 SyntaxWarning, stacklevel=2)
3120 raise SyntaxError()
3121
3122 if self.asMatch:
3123 def pa(tokens):
3124 return tokens[0].expand(repl)
3125 else:
3126 def pa(tokens):
3127 return self.re.sub(repl, tokens[0])
3128 return self.addParseAction(pa)
3129
3130 class QuotedString(Token):
3131 r"""
3132 Token for matching strings that are delimited by quoting characters.
3133
3134 Defined with the following parameters:
3135
3136 - quoteChar - string of one or more characters defining the
3137 quote delimiting string
3138 - escChar - character to escape quotes, typically backslash
3139 (default= ``None`` )
3140 - escQuote - special quote sequence to escape an embedded quote
3141 string (such as SQL's ``""`` to escape an embedded ``"``)
3142 (default= ``None`` )
3143 - multiline - boolean indicating whether quotes can span
3144 multiple lines (default= ``False`` )
3145 - unquoteResults - boolean indicating whether the matched text
3146 should be unquoted (default= ``True`` )
3147 - endQuoteChar - string of one or more characters defining the
3148 end of the quote delimited string (default= ``None`` => same as
3149 quoteChar)
3150 - convertWhitespaceEscapes - convert escaped whitespace
3151 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3152 (default= ``True`` )
3153
3154 Example::
3155
3156 qs = QuotedString('"')
3157 print(qs.searchString('lsjdf "This is the quote" sldjf'))
3158 complex_qs = QuotedString('{{', endQuoteChar='}}')
3159 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
3160 sql_qs = QuotedString('"', escQuote='""')
3161 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3162
3163 prints::
3164
3165 [['This is the quote']]
3166 [['This is the "quote"']]
3167 [['This is the quote with "embedded" quotes']]
3168 """
3169 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
3170 super(QuotedString,self).__init__()
3171
3172 # remove white space from quote chars - wont work anyway
3173 quoteChar = quoteChar.strip()
3174 if not quoteChar:
3175 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
3176 raise SyntaxError()
3177
3178 if endQuoteChar is None:
3179 endQuoteChar = quoteChar
3180 else:
3181 endQuoteChar = endQuoteChar.strip()
3182 if not endQuoteChar:
3183 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
3184 raise SyntaxError()
3185
3186 self.quoteChar = quoteChar
3187 self.quoteCharLen = len(quoteChar)
3188 self.firstQuoteChar = quoteChar[0]
3189 self.endQuoteChar = endQuoteChar
3190 self.endQuoteCharLen = len(endQuoteChar)
3191 self.escChar = escChar
3192 self.escQuote = escQuote
3193 self.unquoteResults = unquoteResults
3194 self.convertWhitespaceEscapes = convertWhitespaceEscapes
3195
3196 if multiline:
3197 self.flags = re.MULTILINE | re.DOTALL
3198 self.pattern = r'%s(?:[^%s%s]' % \
3199 ( re.escape(self.quoteChar),
3200 _escapeRegexRangeChars(self.endQuoteChar[0]),
3201 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
3202 else:
3203 self.flags = 0
3204 self.pattern = r'%s(?:[^%s\n\r%s]' % \
3205 ( re.escape(self.quoteChar),
3206 _escapeRegexRangeChars(self.endQuoteChar[0]),
3207 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
3208 if len(self.endQuoteChar) > 1:
3209 self.pattern += (
3210 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
3211 _escapeRegexRangeChars(self.endQuoteChar[i]))
3212 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
3213 )
3214 if escQuote:
3215 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
3216 if escChar:
3217 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
3218 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
3219 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
3220
3221 try:
3222 self.re = re.compile(self.pattern, self.flags)
3223 self.reString = self.pattern
3224 except sre_constants.error:
3225 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
3226 SyntaxWarning, stacklevel=2)
3227 raise
3228
3229 self.name = _ustr(self)
3230 self.errmsg = "Expected " + self.name
3231 self.mayIndexError = False
3232 self.mayReturnEmpty = True
3233
3234 def parseImpl( self, instring, loc, doActions=True ):
3235 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
3236 if not result:
3237 raise ParseException(instring, loc, self.errmsg, self)
3238
3239 loc = result.end()
3240 ret = result.group()
3241
3242 if self.unquoteResults:
3243
3244 # strip off quotes
3245 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
3246
3247 if isinstance(ret,basestring):
3248 # replace escaped whitespace
3249 if '\\' in ret and self.convertWhitespaceEscapes:
3250 ws_map = {
3251 r'\t' : '\t',
3252 r'\n' : '\n',
3253 r'\f' : '\f',
3254 r'\r' : '\r',
3255 }
3256 for wslit,wschar in ws_map.items():
3257 ret = ret.replace(wslit, wschar)
3258
3259 # replace escaped characters
3260 if self.escChar:
3261 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3262
3263 # replace escaped quotes
3264 if self.escQuote:
3265 ret = ret.replace(self.escQuote, self.endQuoteChar)
3266
3267 return loc, ret
3268
3269 def __str__( self ):
3270 try:
3271 return super(QuotedString,self).__str__()
3272 except Exception:
3273 pass
3274
3275 if self.strRepr is None:
3276 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
3277
3278 return self.strRepr
3279
3280
3281 class CharsNotIn(Token):
3282 """Token for matching words composed of characters *not* in a given
3283 set (will include whitespace in matched characters if not listed in
3284 the provided exclusion set - see example). Defined with string
3285 containing all disallowed characters, and an optional minimum,
3286 maximum, and/or exact length. The default value for ``min`` is
3287 1 (a minimum value < 1 is not valid); the default values for
3288 ``max`` and ``exact`` are 0, meaning no maximum or exact
3289 length restriction.
3290
3291 Example::
3292
3293 # define a comma-separated-value as anything that is not a ','
3294 csv_value = CharsNotIn(',')
3295 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
3296
3297 prints::
3298
3299 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3300 """
3301 def __init__( self, notChars, min=1, max=0, exact=0 ):
3302 super(CharsNotIn,self).__init__()
3303 self.skipWhitespace = False
3304 self.notChars = notChars
3305
3306 if min < 1:
3307 raise ValueError(
3308 "cannot specify a minimum length < 1; use " +
3309 "Optional(CharsNotIn()) if zero-length char group is permitted")
3310
3311 self.minLen = min
3312
3313 if max > 0:
3314 self.maxLen = max
3315 else:
3316 self.maxLen = _MAX_INT
3317
3318 if exact > 0:
3319 self.maxLen = exact
3320 self.minLen = exact
3321
3322 self.name = _ustr(self)
3323 self.errmsg = "Expected " + self.name
3324 self.mayReturnEmpty = ( self.minLen == 0 )
3325 self.mayIndexError = False
3326
3327 def parseImpl( self, instring, loc, doActions=True ):
3328 if instring[loc] in self.notChars:
3329 raise ParseException(instring, loc, self.errmsg, self)
3330
3331 start = loc
3332 loc += 1
3333 notchars = self.notChars
3334 maxlen = min( start+self.maxLen, len(instring) )
3335 while loc < maxlen and \
3336 (instring[loc] not in notchars):
3337 loc += 1
3338
3339 if loc - start < self.minLen:
3340 raise ParseException(instring, loc, self.errmsg, self)
3341
3342 return loc, instring[start:loc]
3343
3344 def __str__( self ):
3345 try:
3346 return super(CharsNotIn, self).__str__()
3347 except Exception:
3348 pass
3349
3350 if self.strRepr is None:
3351 if len(self.notChars) > 4:
3352 self.strRepr = "!W:(%s...)" % self.notChars[:4]
3353 else:
3354 self.strRepr = "!W:(%s)" % self.notChars
3355
3356 return self.strRepr
3357
3358 class White(Token):
3359 """Special matching class for matching whitespace. Normally,
3360 whitespace is ignored by pyparsing grammars. This class is included
3361 when some whitespace structures are significant. Define with
3362 a string containing the whitespace characters to be matched; default
3363 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3364 ``max``, and ``exact`` arguments, as defined for the
3365 :class:`Word` class.
3366 """
3367 whiteStrs = {
3368 ' ' : '<SP>',
3369 '\t': '<TAB>',
3370 '\n': '<LF>',
3371 '\r': '<CR>',
3372 '\f': '<FF>',
3373 'u\00A0': '<NBSP>',
3374 'u\1680': '<OGHAM_SPACE_MARK>',
3375 'u\180E': '<MONGOLIAN_VOWEL_SEPARATOR>',
3376 'u\2000': '<EN_QUAD>',
3377 'u\2001': '<EM_QUAD>',
3378 'u\2002': '<EN_SPACE>',
3379 'u\2003': '<EM_SPACE>',
3380 'u\2004': '<THREE-PER-EM_SPACE>',
3381 'u\2005': '<FOUR-PER-EM_SPACE>',
3382 'u\2006': '<SIX-PER-EM_SPACE>',
3383 'u\2007': '<FIGURE_SPACE>',
3384 'u\2008': '<PUNCTUATION_SPACE>',
3385 'u\2009': '<THIN_SPACE>',
3386 'u\200A': '<HAIR_SPACE>',
3387 'u\200B': '<ZERO_WIDTH_SPACE>',
3388 'u\202F': '<NNBSP>',
3389 'u\205F': '<MMSP>',
3390 'u\3000': '<IDEOGRAPHIC_SPACE>',
3391 }
3392 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3393 super(White,self).__init__()
3394 self.matchWhite = ws
3395 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3396 #~ self.leaveWhitespace()
3397 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3398 self.mayReturnEmpty = True
3399 self.errmsg = "Expected " + self.name
3400
3401 self.minLen = min
3402
3403 if max > 0:
3404 self.maxLen = max
3405 else:
3406 self.maxLen = _MAX_INT
3407
3408 if exact > 0:
3409 self.maxLen = exact
3410 self.minLen = exact
3411
3412 def parseImpl( self, instring, loc, doActions=True ):
3413 if instring[loc] not in self.matchWhite:
3414 raise ParseException(instring, loc, self.errmsg, self)
3415 start = loc
3416 loc += 1
3417 maxloc = start + self.maxLen
3418 maxloc = min( maxloc, len(instring) )
3419 while loc < maxloc and instring[loc] in self.matchWhite:
3420 loc += 1
3421
3422 if loc - start < self.minLen:
3423 raise ParseException(instring, loc, self.errmsg, self)
3424
3425 return loc, instring[start:loc]
3426
3427
3428 class _PositionToken(Token):
3429 def __init__( self ):
3430 super(_PositionToken,self).__init__()
3431 self.name=self.__class__.__name__
3432 self.mayReturnEmpty = True
3433 self.mayIndexError = False
3434
3435 class GoToColumn(_PositionToken):
3436 """Token to advance to a specific column of input text; useful for
3437 tabular report scraping.
3438 """
3439 def __init__( self, colno ):
3440 super(GoToColumn,self).__init__()
3441 self.col = colno
3442
3443 def preParse( self, instring, loc ):
3444 if col(loc,instring) != self.col:
3445 instrlen = len(instring)
3446 if self.ignoreExprs:
3447 loc = self._skipIgnorables( instring, loc )
3448 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3449 loc += 1
3450 return loc
3451
3452 def parseImpl( self, instring, loc, doActions=True ):
3453 thiscol = col( loc, instring )
3454 if thiscol > self.col:
3455 raise ParseException( instring, loc, "Text not in expected column", self )
3456 newloc = loc + self.col - thiscol
3457 ret = instring[ loc: newloc ]
3458 return newloc, ret
3459
3460
3461 class LineStart(_PositionToken):
3462 r"""Matches if current position is at the beginning of a line within
3463 the parse string
3464
3465 Example::
3466
3467 test = '''\
3468 AAA this line
3469 AAA and this line
3470 AAA but not this one
3471 B AAA and definitely not this one
3472 '''
3473
3474 for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3475 print(t)
3476
3477 prints::
3478
3479 ['AAA', ' this line']
3480 ['AAA', ' and this line']
3481
3482 """
3483 def __init__( self ):
3484 super(LineStart,self).__init__()
3485 self.errmsg = "Expected start of line"
3486
3487 def parseImpl( self, instring, loc, doActions=True ):
3488 if col(loc, instring) == 1:
3489 return loc, []
3490 raise ParseException(instring, loc, self.errmsg, self)
3491
3492 class LineEnd(_PositionToken):
3493 """Matches if current position is at the end of a line within the
3494 parse string
3495 """
3496 def __init__( self ):
3497 super(LineEnd,self).__init__()
3498 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
3499 self.errmsg = "Expected end of line"
3500
3501 def parseImpl( self, instring, loc, doActions=True ):
3502 if loc<len(instring):
3503 if instring[loc] == "\n":
3504 return loc+1, "\n"
3505 else:
3506 raise ParseException(instring, loc, self.errmsg, self)
3507 elif loc == len(instring):
3508 return loc+1, []
3509 else:
3510 raise ParseException(instring, loc, self.errmsg, self)
3511
3512 class StringStart(_PositionToken):
3513 """Matches if current position is at the beginning of the parse
3514 string
3515 """
3516 def __init__( self ):
3517 super(StringStart,self).__init__()
3518 self.errmsg = "Expected start of text"
3519
3520 def parseImpl( self, instring, loc, doActions=True ):
3521 if loc != 0:
3522 # see if entire string up to here is just whitespace and ignoreables
3523 if loc != self.preParse( instring, 0 ):
3524 raise ParseException(instring, loc, self.errmsg, self)
3525 return loc, []
3526
3527 class StringEnd(_PositionToken):
3528 """Matches if current position is at the end of the parse string
3529 """
3530 def __init__( self ):
3531 super(StringEnd,self).__init__()
3532 self.errmsg = "Expected end of text"
3533
3534 def parseImpl( self, instring, loc, doActions=True ):
3535 if loc < len(instring):
3536 raise ParseException(instring, loc, self.errmsg, self)
3537 elif loc == len(instring):
3538 return loc+1, []
3539 elif loc > len(instring):
3540 return loc, []
3541 else:
3542 raise ParseException(instring, loc, self.errmsg, self)
3543
3544 class WordStart(_PositionToken):
3545 """Matches if the current position is at the beginning of a Word,
3546 and is not preceded by any character in a given set of
3547 ``wordChars`` (default= ``printables``). To emulate the
3548 ``\b`` behavior of regular expressions, use
3549 ``WordStart(alphanums)``. ``WordStart`` will also match at
3550 the beginning of the string being parsed, or at the beginning of
3551 a line.
3552 """
3553 def __init__(self, wordChars = printables):
3554 super(WordStart,self).__init__()
3555 self.wordChars = set(wordChars)
3556 self.errmsg = "Not at the start of a word"
3557
3558 def parseImpl(self, instring, loc, doActions=True ):
3559 if loc != 0:
3560 if (instring[loc-1] in self.wordChars or
3561 instring[loc] not in self.wordChars):
3562 raise ParseException(instring, loc, self.errmsg, self)
3563 return loc, []
3564
3565 class WordEnd(_PositionToken):
3566 """Matches if the current position is at the end of a Word, and is
3567 not followed by any character in a given set of ``wordChars``
3568 (default= ``printables``). To emulate the ``\b`` behavior of
3569 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3570 will also match at the end of the string being parsed, or at the end
3571 of a line.
3572 """
3573 def __init__(self, wordChars = printables):
3574 super(WordEnd,self).__init__()
3575 self.wordChars = set(wordChars)
3576 self.skipWhitespace = False
3577 self.errmsg = "Not at the end of a word"
3578
3579 def parseImpl(self, instring, loc, doActions=True ):
3580 instrlen = len(instring)
3581 if instrlen>0 and loc<instrlen:
3582 if (instring[loc] in self.wordChars or
3583 instring[loc-1] not in self.wordChars):
3584 raise ParseException(instring, loc, self.errmsg, self)
3585 return loc, []
3586
3587
3588 class ParseExpression(ParserElement):
3589 """Abstract subclass of ParserElement, for combining and
3590 post-processing parsed tokens.
3591 """
3592 def __init__( self, exprs, savelist = False ):
3593 super(ParseExpression,self).__init__(savelist)
3594 if isinstance( exprs, _generatorType ):
3595 exprs = list(exprs)
3596
3597 if isinstance( exprs, basestring ):
3598 self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3599 elif isinstance( exprs, Iterable ):
3600 exprs = list(exprs)
3601 # if sequence of strings provided, wrap with Literal
3602 if all(isinstance(expr, basestring) for expr in exprs):
3603 exprs = map(ParserElement._literalStringClass, exprs)
3604 self.exprs = list(exprs)
3605 else:
3606 try:
3607 self.exprs = list( exprs )
3608 except TypeError:
3609 self.exprs = [ exprs ]
3610 self.callPreparse = False
3611
3612 def __getitem__( self, i ):
3613 return self.exprs[i]
3614
3615 def append( self, other ):
3616 self.exprs.append( other )
3617 self.strRepr = None
3618 return self
3619
3620 def leaveWhitespace( self ):
3621 """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
3622 all contained expressions."""
3623 self.skipWhitespace = False
3624 self.exprs = [ e.copy() for e in self.exprs ]
3625 for e in self.exprs:
3626 e.leaveWhitespace()
3627 return self
3628
3629 def ignore( self, other ):
3630 if isinstance( other, Suppress ):
3631 if other not in self.ignoreExprs:
3632 super( ParseExpression, self).ignore( other )
3633 for e in self.exprs:
3634 e.ignore( self.ignoreExprs[-1] )
3635 else:
3636 super( ParseExpression, self).ignore( other )
3637 for e in self.exprs:
3638 e.ignore( self.ignoreExprs[-1] )
3639 return self
3640
3641 def __str__( self ):
3642 try:
3643 return super(ParseExpression,self).__str__()
3644 except Exception:
3645 pass
3646
3647 if self.strRepr is None:
3648 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3649 return self.strRepr
3650
3651 def streamline( self ):
3652 super(ParseExpression,self).streamline()
3653
3654 for e in self.exprs:
3655 e.streamline()
3656
3657 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
3658 # but only if there are no parse actions or resultsNames on the nested And's
3659 # (likewise for Or's and MatchFirst's)
3660 if ( len(self.exprs) == 2 ):
3661 other = self.exprs[0]
3662 if ( isinstance( other, self.__class__ ) and
3663 not(other.parseAction) and
3664 other.resultsName is None and
3665 not other.debug ):
3666 self.exprs = other.exprs[:] + [ self.exprs[1] ]
3667 self.strRepr = None
3668 self.mayReturnEmpty |= other.mayReturnEmpty
3669 self.mayIndexError |= other.mayIndexError
3670
3671 other = self.exprs[-1]
3672 if ( isinstance( other, self.__class__ ) and
3673 not(other.parseAction) and
3674 other.resultsName is None and
3675 not other.debug ):
3676 self.exprs = self.exprs[:-1] + other.exprs[:]
3677 self.strRepr = None
3678 self.mayReturnEmpty |= other.mayReturnEmpty
3679 self.mayIndexError |= other.mayIndexError
3680
3681 self.errmsg = "Expected " + _ustr(self)
3682
3683 return self
3684
3685 def validate( self, validateTrace=[] ):
3686 tmp = validateTrace[:]+[self]
3687 for e in self.exprs:
3688 e.validate(tmp)
3689 self.checkRecursion( [] )
3690
3691 def copy(self):
3692 ret = super(ParseExpression,self).copy()
3693 ret.exprs = [e.copy() for e in self.exprs]
3694 return ret
3695
3696 class And(ParseExpression):
3697 """
3698 Requires all given :class:`ParseExpression` s to be found in the given order.
3699 Expressions may be separated by whitespace.
3700 May be constructed using the ``'+'`` operator.
3701 May also be constructed using the ``'-'`` operator, which will
3702 suppress backtracking.
3703
3704 Example::
3705
3706 integer = Word(nums)
3707 name_expr = OneOrMore(Word(alphas))
3708
3709 expr = And([integer("id"),name_expr("name"),integer("age")])
3710 # more easily written as:
3711 expr = integer("id") + name_expr("name") + integer("age")
3712 """
3713
3714 class _ErrorStop(Empty):
3715 def __init__(self, *args, **kwargs):
3716 super(And._ErrorStop,self).__init__(*args, **kwargs)
3717 self.name = '-'
3718 self.leaveWhitespace()
3719
3720 def __init__( self, exprs, savelist = True ):
3721 super(And,self).__init__(exprs, savelist)
3722 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3723 self.setWhitespaceChars( self.exprs[0].whiteChars )
3724 self.skipWhitespace = self.exprs[0].skipWhitespace
3725 self.callPreparse = True
3726
3727 def streamline(self):
3728 super(And, self).streamline()
3729 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3730 return self
3731
3732 def parseImpl( self, instring, loc, doActions=True ):
3733 # pass False as last arg to _parse for first element, since we already
3734 # pre-parsed the string as part of our And pre-parsing
3735 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3736 errorStop = False
3737 for e in self.exprs[1:]:
3738 if isinstance(e, And._ErrorStop):
3739 errorStop = True
3740 continue
3741 if errorStop:
3742 try:
3743 loc, exprtokens = e._parse( instring, loc, doActions )
3744 except ParseSyntaxException:
3745 raise
3746 except ParseBaseException as pe:
3747 pe.__traceback__ = None
3748 raise ParseSyntaxException._from_exception(pe)
3749 except IndexError:
3750 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3751 else:
3752 loc, exprtokens = e._parse( instring, loc, doActions )
3753 if exprtokens or exprtokens.haskeys():
3754 resultlist += exprtokens
3755 return loc, resultlist
3756
3757 def __iadd__(self, other ):
3758 if isinstance( other, basestring ):
3759 other = ParserElement._literalStringClass( other )
3760 return self.append( other ) #And( [ self, other ] )
3761
3762 def checkRecursion( self, parseElementList ):
3763 subRecCheckList = parseElementList[:] + [ self ]
3764 for e in self.exprs:
3765 e.checkRecursion( subRecCheckList )
3766 if not e.mayReturnEmpty:
3767 break
3768
3769 def __str__( self ):
3770 if hasattr(self,"name"):
3771 return self.name
3772
3773 if self.strRepr is None:
3774 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3775
3776 return self.strRepr
3777
3778
3779 class Or(ParseExpression):
3780 """Requires that at least one :class:`ParseExpression` is found. If
3781 two expressions match, the expression that matches the longest
3782 string will be used. May be constructed using the ``'^'``
3783 operator.
3784
3785 Example::
3786
3787 # construct Or using '^' operator
3788
3789 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3790 print(number.searchString("123 3.1416 789"))
3791
3792 prints::
3793
3794 [['123'], ['3.1416'], ['789']]
3795 """
3796 def __init__( self, exprs, savelist = False ):
3797 super(Or,self).__init__(exprs, savelist)
3798 if self.exprs:
3799 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3800 else:
3801 self.mayReturnEmpty = True
3802
3803 def streamline(self):
3804 super(Or, self).streamline()
3805 if __compat__.collect_all_And_tokens:
3806 self.saveAsList = any(e.saveAsList for e in self.exprs)
3807 return self
3808
3809 def parseImpl( self, instring, loc, doActions=True ):
3810 maxExcLoc = -1
3811 maxException = None
3812 matches = []
3813 for e in self.exprs:
3814 try:
3815 loc2 = e.tryParse( instring, loc )
3816 except ParseException as err:
3817 err.__traceback__ = None
3818 if err.loc > maxExcLoc:
3819 maxException = err
3820 maxExcLoc = err.loc
3821 except IndexError:
3822 if len(instring) > maxExcLoc:
3823 maxException = ParseException(instring,len(instring),e.errmsg,self)
3824 maxExcLoc = len(instring)
3825 else:
3826 # save match among all matches, to retry longest to shortest
3827 matches.append((loc2, e))
3828
3829 if matches:
3830 matches.sort(key=lambda x: -x[0])
3831 for _,e in matches:
3832 try:
3833 return e._parse( instring, loc, doActions )
3834 except ParseException as err:
3835 err.__traceback__ = None
3836 if err.loc > maxExcLoc:
3837 maxException = err
3838 maxExcLoc = err.loc
3839
3840 if maxException is not None:
3841 maxException.msg = self.errmsg
3842 raise maxException
3843 else:
3844 raise ParseException(instring, loc, "no defined alternatives to match", self)
3845
3846
3847 def __ixor__(self, other ):
3848 if isinstance( other, basestring ):
3849 other = ParserElement._literalStringClass( other )
3850 return self.append( other ) #Or( [ self, other ] )
3851
3852 def __str__( self ):
3853 if hasattr(self,"name"):
3854 return self.name
3855
3856 if self.strRepr is None:
3857 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3858
3859 return self.strRepr
3860
3861 def checkRecursion( self, parseElementList ):
3862 subRecCheckList = parseElementList[:] + [ self ]
3863 for e in self.exprs:
3864 e.checkRecursion( subRecCheckList )
3865
3866
3867 class MatchFirst(ParseExpression):
3868 """Requires that at least one :class:`ParseExpression` is found. If
3869 two expressions match, the first one listed is the one that will
3870 match. May be constructed using the ``'|'`` operator.
3871
3872 Example::
3873
3874 # construct MatchFirst using '|' operator
3875
3876 # watch the order of expressions to match
3877 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3878 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
3879
3880 # put more selective expression first
3881 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3882 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
3883 """
3884 def __init__( self, exprs, savelist = False ):
3885 super(MatchFirst,self).__init__(exprs, savelist)
3886 if self.exprs:
3887 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3888 else:
3889 self.mayReturnEmpty = True
3890
3891 def streamline(self):
3892 super(MatchFirst, self).streamline()
3893 if __compat__.collect_all_And_tokens:
3894 self.saveAsList = any(e.saveAsList for e in self.exprs)
3895 return self
3896
3897 def parseImpl( self, instring, loc, doActions=True ):
3898 maxExcLoc = -1
3899 maxException = None
3900 for e in self.exprs:
3901 try:
3902 ret = e._parse( instring, loc, doActions )
3903 return ret
3904 except ParseException as err:
3905 if err.loc > maxExcLoc:
3906 maxException = err
3907 maxExcLoc = err.loc
3908 except IndexError:
3909 if len(instring) > maxExcLoc:
3910 maxException = ParseException(instring,len(instring),e.errmsg,self)
3911 maxExcLoc = len(instring)
3912
3913 # only got here if no expression matched, raise exception for match that made it the furthest
3914 else:
3915 if maxException is not None:
3916 maxException.msg = self.errmsg
3917 raise maxException
3918 else:
3919 raise ParseException(instring, loc, "no defined alternatives to match", self)
3920
3921 def __ior__(self, other ):
3922 if isinstance( other, basestring ):
3923 other = ParserElement._literalStringClass( other )
3924 return self.append( other ) #MatchFirst( [ self, other ] )
3925
3926 def __str__( self ):
3927 if hasattr(self,"name"):
3928 return self.name
3929
3930 if self.strRepr is None:
3931 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3932
3933 return self.strRepr
3934
3935 def checkRecursion( self, parseElementList ):
3936 subRecCheckList = parseElementList[:] + [ self ]
3937 for e in self.exprs:
3938 e.checkRecursion( subRecCheckList )
3939
3940
3941 class Each(ParseExpression):
3942 """Requires all given :class:`ParseExpression` s to be found, but in
3943 any order. Expressions may be separated by whitespace.
3944
3945 May be constructed using the ``'&'`` operator.
3946
3947 Example::
3948
3949 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3950 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3951 integer = Word(nums)
3952 shape_attr = "shape:" + shape_type("shape")
3953 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3954 color_attr = "color:" + color("color")
3955 size_attr = "size:" + integer("size")
3956
3957 # use Each (using operator '&') to accept attributes in any order
3958 # (shape and posn are required, color and size are optional)
3959 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3960
3961 shape_spec.runTests('''
3962 shape: SQUARE color: BLACK posn: 100, 120
3963 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3964 color:GREEN size:20 shape:TRIANGLE posn:20,40
3965 '''
3966 )
3967
3968 prints::
3969
3970 shape: SQUARE color: BLACK posn: 100, 120
3971 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3972 - color: BLACK
3973 - posn: ['100', ',', '120']
3974 - x: 100
3975 - y: 120
3976 - shape: SQUARE
3977
3978
3979 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3980 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3981 - color: BLUE
3982 - posn: ['50', ',', '80']
3983 - x: 50
3984 - y: 80
3985 - shape: CIRCLE
3986 - size: 50
3987
3988
3989 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3990 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3991 - color: GREEN
3992 - posn: ['20', ',', '40']
3993 - x: 20
3994 - y: 40
3995 - shape: TRIANGLE
3996 - size: 20
3997 """
3998 def __init__( self, exprs, savelist = True ):
3999 super(Each,self).__init__(exprs, savelist)
4000 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4001 self.skipWhitespace = True
4002 self.initExprGroups = True
4003 self.saveAsList = True
4004
4005 def streamline(self):
4006 super(Each, self).streamline()
4007 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4008 return self
4009
4010 def parseImpl( self, instring, loc, doActions=True ):
4011 if self.initExprGroups:
4012 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
4013 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
4014 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
4015 self.optionals = opt1 + opt2
4016 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
4017 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
4018 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
4019 self.required += self.multirequired
4020 self.initExprGroups = False
4021 tmpLoc = loc
4022 tmpReqd = self.required[:]
4023 tmpOpt = self.optionals[:]
4024 matchOrder = []
4025
4026 keepMatching = True
4027 while keepMatching:
4028 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
4029 failed = []
4030 for e in tmpExprs:
4031 try:
4032 tmpLoc = e.tryParse( instring, tmpLoc )
4033 except ParseException:
4034 failed.append(e)
4035 else:
4036 matchOrder.append(self.opt1map.get(id(e),e))
4037 if e in tmpReqd:
4038 tmpReqd.remove(e)
4039 elif e in tmpOpt:
4040 tmpOpt.remove(e)
4041 if len(failed) == len(tmpExprs):
4042 keepMatching = False
4043
4044 if tmpReqd:
4045 missing = ", ".join(_ustr(e) for e in tmpReqd)
4046 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
4047
4048 # add any unmatched Optionals, in case they have default values defined
4049 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
4050
4051 resultlist = []
4052 for e in matchOrder:
4053 loc,results = e._parse(instring,loc,doActions)
4054 resultlist.append(results)
4055
4056 finalResults = sum(resultlist, ParseResults([]))
4057 return loc, finalResults
4058
4059 def __str__( self ):
4060 if hasattr(self,"name"):
4061 return self.name
4062
4063 if self.strRepr is None:
4064 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
4065
4066 return self.strRepr
4067
4068 def checkRecursion( self, parseElementList ):
4069 subRecCheckList = parseElementList[:] + [ self ]
4070 for e in self.exprs:
4071 e.checkRecursion( subRecCheckList )
4072
4073
4074 class ParseElementEnhance(ParserElement):
4075 """Abstract subclass of :class:`ParserElement`, for combining and
4076 post-processing parsed tokens.
4077 """
4078 def __init__( self, expr, savelist=False ):
4079 super(ParseElementEnhance,self).__init__(savelist)
4080 if isinstance( expr, basestring ):
4081 if issubclass(ParserElement._literalStringClass, Token):
4082 expr = ParserElement._literalStringClass(expr)
4083 else:
4084 expr = ParserElement._literalStringClass(Literal(expr))
4085 self.expr = expr
4086 self.strRepr = None
4087 if expr is not None:
4088 self.mayIndexError = expr.mayIndexError
4089 self.mayReturnEmpty = expr.mayReturnEmpty
4090 self.setWhitespaceChars( expr.whiteChars )
4091 self.skipWhitespace = expr.skipWhitespace
4092 self.saveAsList = expr.saveAsList
4093 self.callPreparse = expr.callPreparse
4094 self.ignoreExprs.extend(expr.ignoreExprs)
4095
4096 def parseImpl( self, instring, loc, doActions=True ):
4097 if self.expr is not None:
4098 return self.expr._parse( instring, loc, doActions, callPreParse=False )
4099 else:
4100 raise ParseException("",loc,self.errmsg,self)
4101
4102 def leaveWhitespace( self ):
4103 self.skipWhitespace = False
4104 self.expr = self.expr.copy()
4105 if self.expr is not None:
4106 self.expr.leaveWhitespace()
4107 return self
4108
4109 def ignore( self, other ):
4110 if isinstance( other, Suppress ):
4111 if other not in self.ignoreExprs:
4112 super( ParseElementEnhance, self).ignore( other )
4113 if self.expr is not None:
4114 self.expr.ignore( self.ignoreExprs[-1] )
4115 else:
4116 super( ParseElementEnhance, self).ignore( other )
4117 if self.expr is not None:
4118 self.expr.ignore( self.ignoreExprs[-1] )
4119 return self
4120
4121 def streamline( self ):
4122 super(ParseElementEnhance,self).streamline()
4123 if self.expr is not None:
4124 self.expr.streamline()
4125 return self
4126
4127 def checkRecursion( self, parseElementList ):
4128 if self in parseElementList:
4129 raise RecursiveGrammarException( parseElementList+[self] )
4130 subRecCheckList = parseElementList[:] + [ self ]
4131 if self.expr is not None:
4132 self.expr.checkRecursion( subRecCheckList )
4133
4134 def validate( self, validateTrace=[] ):
4135 tmp = validateTrace[:]+[self]
4136 if self.expr is not None:
4137 self.expr.validate(tmp)
4138 self.checkRecursion( [] )
4139
4140 def __str__( self ):
4141 try:
4142 return super(ParseElementEnhance,self).__str__()
4143 except Exception:
4144 pass
4145
4146 if self.strRepr is None and self.expr is not None:
4147 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
4148 return self.strRepr
4149
4150
4151 class FollowedBy(ParseElementEnhance):
4152 """Lookahead matching of the given parse expression.
4153 ``FollowedBy`` does *not* advance the parsing position within
4154 the input string, it only verifies that the specified parse
4155 expression matches at the current position. ``FollowedBy``
4156 always returns a null token list. If any results names are defined
4157 in the lookahead expression, those *will* be returned for access by
4158 name.
4159
4160 Example::
4161
4162 # use FollowedBy to match a label only if it is followed by a ':'
4163 data_word = Word(alphas)
4164 label = data_word + FollowedBy(':')
4165 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4166
4167 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
4168
4169 prints::
4170
4171 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4172 """
4173 def __init__( self, expr ):
4174 super(FollowedBy,self).__init__(expr)
4175 self.mayReturnEmpty = True
4176
4177 def parseImpl( self, instring, loc, doActions=True ):
4178 _, ret = self.expr._parse(instring, loc, doActions=doActions)
4179 del ret[:]
4180 return loc, ret
4181
4182
4183 class PrecededBy(ParseElementEnhance):
4184 """Lookbehind matching of the given parse expression.
4185 ``PrecededBy`` does not advance the parsing position within the
4186 input string, it only verifies that the specified parse expression
4187 matches prior to the current position. ``PrecededBy`` always
4188 returns a null token list, but if a results name is defined on the
4189 given expression, it is returned.
4190
4191 Parameters:
4192
4193 - expr - expression that must match prior to the current parse
4194 location
4195 - retreat - (default= ``None``) - (int) maximum number of characters
4196 to lookbehind prior to the current parse location
4197
4198 If the lookbehind expression is a string, Literal, Keyword, or
4199 a Word or CharsNotIn with a specified exact or maximum length, then
4200 the retreat parameter is not required. Otherwise, retreat must be
4201 specified to give a maximum number of characters to look back from
4202 the current parse position for a lookbehind match.
4203
4204 Example::
4205
4206 # VB-style variable names with type prefixes
4207 int_var = PrecededBy("#") + pyparsing_common.identifier
4208 str_var = PrecededBy("$") + pyparsing_common.identifier
4209
4210 """
4211 def __init__(self, expr, retreat=None):
4212 super(PrecededBy, self).__init__(expr)
4213 self.expr = self.expr().leaveWhitespace()
4214 self.mayReturnEmpty = True
4215 self.mayIndexError = False
4216 self.exact = False
4217 if isinstance(expr, str):
4218 retreat = len(expr)
4219 self.exact = True
4220 elif isinstance(expr, (Literal, Keyword)):
4221 retreat = expr.matchLen
4222 self.exact = True
4223 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4224 retreat = expr.maxLen
4225 self.exact = True
4226 elif isinstance(expr, _PositionToken):
4227 retreat = 0
4228 self.exact = True
4229 self.retreat = retreat
4230 self.errmsg = "not preceded by " + str(expr)
4231 self.skipWhitespace = False
4232
4233 def parseImpl(self, instring, loc=0, doActions=True):
4234 if self.exact:
4235 if loc < self.retreat:
4236 raise ParseException(instring, loc, self.errmsg)
4237 start = loc - self.retreat
4238 _, ret = self.expr._parse(instring, start)
4239 else:
4240 # retreat specified a maximum lookbehind window, iterate
4241 test_expr = self.expr + StringEnd()
4242 instring_slice = instring[:loc]
4243 last_expr = ParseException(instring, loc, self.errmsg)
4244 for offset in range(1, min(loc, self.retreat+1)):
4245 try:
4246 _, ret = test_expr._parse(instring_slice, loc-offset)
4247 except ParseBaseException as pbe:
4248 last_expr = pbe
4249 else:
4250 break
4251 else:
4252 raise last_expr
4253 # return empty list of tokens, but preserve any defined results names
4254 del ret[:]
4255 return loc, ret
4256
4257
4258 class NotAny(ParseElementEnhance):
4259 """Lookahead to disallow matching with the given parse expression.
4260 ``NotAny`` does *not* advance the parsing position within the
4261 input string, it only verifies that the specified parse expression
4262 does *not* match at the current position. Also, ``NotAny`` does
4263 *not* skip over leading whitespace. ``NotAny`` always returns
4264 a null token list. May be constructed using the '~' operator.
4265
4266 Example::
4267
4268 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4269
4270 # take care not to mistake keywords for identifiers
4271 ident = ~(AND | OR | NOT) + Word(alphas)
4272 boolean_term = Optional(NOT) + ident
4273
4274 # very crude boolean expression - to support parenthesis groups and
4275 # operation hierarchy, use infixNotation
4276 boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
4277
4278 # integers that are followed by "." are actually floats
4279 integer = Word(nums) + ~Char(".")
4280 """
4281 def __init__( self, expr ):
4282 super(NotAny,self).__init__(expr)
4283 #~ self.leaveWhitespace()
4284 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
4285 self.mayReturnEmpty = True
4286 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
4287
4288 def parseImpl( self, instring, loc, doActions=True ):
4289 if self.expr.canParseNext(instring, loc):
4290 raise ParseException(instring, loc, self.errmsg, self)
4291 return loc, []
4292
4293 def __str__( self ):
4294 if hasattr(self,"name"):
4295 return self.name
4296
4297 if self.strRepr is None:
4298 self.strRepr = "~{" + _ustr(self.expr) + "}"
4299
4300 return self.strRepr
4301
4302 class _MultipleMatch(ParseElementEnhance):
4303 def __init__( self, expr, stopOn=None):
4304 super(_MultipleMatch, self).__init__(expr)
4305 self.saveAsList = True
4306 ender = stopOn
4307 if isinstance(ender, basestring):
4308 ender = ParserElement._literalStringClass(ender)
4309 self.not_ender = ~ender if ender is not None else None
4310
4311 def parseImpl( self, instring, loc, doActions=True ):
4312 self_expr_parse = self.expr._parse
4313 self_skip_ignorables = self._skipIgnorables
4314 check_ender = self.not_ender is not None
4315 if check_ender:
4316 try_not_ender = self.not_ender.tryParse
4317
4318 # must be at least one (but first see if we are the stopOn sentinel;
4319 # if so, fail)
4320 if check_ender:
4321 try_not_ender(instring, loc)
4322 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
4323 try:
4324 hasIgnoreExprs = (not not self.ignoreExprs)
4325 while 1:
4326 if check_ender:
4327 try_not_ender(instring, loc)
4328 if hasIgnoreExprs:
4329 preloc = self_skip_ignorables( instring, loc )
4330 else:
4331 preloc = loc
4332 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
4333 if tmptokens or tmptokens.haskeys():
4334 tokens += tmptokens
4335 except (ParseException,IndexError):
4336 pass
4337
4338 return loc, tokens
4339
4340 class OneOrMore(_MultipleMatch):
4341 """Repetition of one or more of the given expression.
4342
4343 Parameters:
4344 - expr - expression that must match one or more times
4345 - stopOn - (default= ``None``) - expression for a terminating sentinel
4346 (only required if the sentinel would ordinarily match the repetition
4347 expression)
4348
4349 Example::
4350
4351 data_word = Word(alphas)
4352 label = data_word + FollowedBy(':')
4353 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4354
4355 text = "shape: SQUARE posn: upper left color: BLACK"
4356 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4357
4358 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
4359 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4360 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4361
4362 # could also be written as
4363 (attr_expr * (1,)).parseString(text).pprint()
4364 """
4365
4366 def __str__( self ):
4367 if hasattr(self,"name"):
4368 return self.name
4369
4370 if self.strRepr is None:
4371 self.strRepr = "{" + _ustr(self.expr) + "}..."
4372
4373 return self.strRepr
4374
4375 class ZeroOrMore(_MultipleMatch):
4376 """Optional repetition of zero or more of the given expression.
4377
4378 Parameters:
4379 - expr - expression that must match zero or more times
4380 - stopOn - (default= ``None``) - expression for a terminating sentinel
4381 (only required if the sentinel would ordinarily match the repetition
4382 expression)
4383
4384 Example: similar to :class:`OneOrMore`
4385 """
4386 def __init__( self, expr, stopOn=None):
4387 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
4388 self.mayReturnEmpty = True
4389
4390 def parseImpl( self, instring, loc, doActions=True ):
4391 try:
4392 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
4393 except (ParseException,IndexError):
4394 return loc, []
4395
4396 def __str__( self ):
4397 if hasattr(self,"name"):
4398 return self.name
4399
4400 if self.strRepr is None:
4401 self.strRepr = "[" + _ustr(self.expr) + "]..."
4402
4403 return self.strRepr
4404
4405 class _NullToken(object):
4406 def __bool__(self):
4407 return False
4408 __nonzero__ = __bool__
4409 def __str__(self):
4410 return ""
4411
4412 _optionalNotMatched = _NullToken()
4413 class Optional(ParseElementEnhance):
4414 """Optional matching of the given expression.
4415
4416 Parameters:
4417 - expr - expression that must match zero or more times
4418 - default (optional) - value to be returned if the optional expression is not found.
4419
4420 Example::
4421
4422 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4423 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
4424 zip.runTests('''
4425 # traditional ZIP code
4426 12345
4427
4428 # ZIP+4 form
4429 12101-0001
4430
4431 # invalid ZIP
4432 98765-
4433 ''')
4434
4435 prints::
4436
4437 # traditional ZIP code
4438 12345
4439 ['12345']
4440
4441 # ZIP+4 form
4442 12101-0001
4443 ['12101-0001']
4444
4445 # invalid ZIP
4446 98765-
4447 ^
4448 FAIL: Expected end of text (at char 5), (line:1, col:6)
4449 """
4450 def __init__( self, expr, default=_optionalNotMatched ):
4451 super(Optional,self).__init__( expr, savelist=False )
4452 self.saveAsList = self.expr.saveAsList
4453 self.defaultValue = default
4454 self.mayReturnEmpty = True
4455
4456 def parseImpl( self, instring, loc, doActions=True ):
4457 try:
4458 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
4459 except (ParseException,IndexError):
4460 if self.defaultValue is not _optionalNotMatched:
4461 if self.expr.resultsName:
4462 tokens = ParseResults([ self.defaultValue ])
4463 tokens[self.expr.resultsName] = self.defaultValue
4464 else:
4465 tokens = [ self.defaultValue ]
4466 else:
4467 tokens = []
4468 return loc, tokens
4469
4470 def __str__( self ):
4471 if hasattr(self,"name"):
4472 return self.name
4473
4474 if self.strRepr is None:
4475 self.strRepr = "[" + _ustr(self.expr) + "]"
4476
4477 return self.strRepr
4478
4479 class SkipTo(ParseElementEnhance):
4480 """Token for skipping over all undefined text until the matched
4481 expression is found.
4482
4483 Parameters:
4484 - expr - target expression marking the end of the data to be skipped
4485 - include - (default= ``False``) if True, the target expression is also parsed
4486 (the skipped text and target expression are returned as a 2-element list).
4487 - ignore - (default= ``None``) used to define grammars (typically quoted strings and
4488 comments) that might contain false matches to the target expression
4489 - failOn - (default= ``None``) define expressions that are not allowed to be
4490 included in the skipped test; if found before the target expression is found,
4491 the SkipTo is not a match
4492
4493 Example::
4494
4495 report = '''
4496 Outstanding Issues Report - 1 Jan 2000
4497
4498 # | Severity | Description | Days Open
4499 -----+----------+-------------------------------------------+-----------
4500 101 | Critical | Intermittent system crash | 6
4501 94 | Cosmetic | Spelling error on Login ('log|n') | 14
4502 79 | Minor | System slow when running too many reports | 47
4503 '''
4504 integer = Word(nums)
4505 SEP = Suppress('|')
4506 # use SkipTo to simply match everything up until the next SEP
4507 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4508 # - parse action will call token.strip() for each matched token, i.e., the description body
4509 string_data = SkipTo(SEP, ignore=quotedString)
4510 string_data.setParseAction(tokenMap(str.strip))
4511 ticket_expr = (integer("issue_num") + SEP
4512 + string_data("sev") + SEP
4513 + string_data("desc") + SEP
4514 + integer("days_open"))
4515
4516 for tkt in ticket_expr.searchString(report):
4517 print tkt.dump()
4518
4519 prints::
4520
4521 ['101', 'Critical', 'Intermittent system crash', '6']
4522 - days_open: 6
4523 - desc: Intermittent system crash
4524 - issue_num: 101
4525 - sev: Critical
4526 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4527 - days_open: 14
4528 - desc: Spelling error on Login ('log|n')
4529 - issue_num: 94
4530 - sev: Cosmetic
4531 ['79', 'Minor', 'System slow when running too many reports', '47']
4532 - days_open: 47
4533 - desc: System slow when running too many reports
4534 - issue_num: 79
4535 - sev: Minor
4536 """
4537 def __init__( self, other, include=False, ignore=None, failOn=None ):
4538 super( SkipTo, self ).__init__( other )
4539 self.ignoreExpr = ignore
4540 self.mayReturnEmpty = True
4541 self.mayIndexError = False
4542 self.includeMatch = include
4543 self.saveAsList = False
4544 if isinstance(failOn, basestring):
4545 self.failOn = ParserElement._literalStringClass(failOn)
4546 else:
4547 self.failOn = failOn
4548 self.errmsg = "No match found for "+_ustr(self.expr)
4549
4550 def parseImpl( self, instring, loc, doActions=True ):
4551 startloc = loc
4552 instrlen = len(instring)
4553 expr = self.expr
4554 expr_parse = self.expr._parse
4555 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4556 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4557
4558 tmploc = loc
4559 while tmploc <= instrlen:
4560 if self_failOn_canParseNext is not None:
4561 # break if failOn expression matches
4562 if self_failOn_canParseNext(instring, tmploc):
4563 break
4564
4565 if self_ignoreExpr_tryParse is not None:
4566 # advance past ignore expressions
4567 while 1:
4568 try:
4569 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4570 except ParseBaseException:
4571 break
4572
4573 try:
4574 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4575 except (ParseException, IndexError):
4576 # no match, advance loc in string
4577 tmploc += 1
4578 else:
4579 # matched skipto expr, done
4580 break
4581
4582 else:
4583 # ran off the end of the input string without matching skipto expr, fail
4584 raise ParseException(instring, loc, self.errmsg, self)
4585
4586 # build up return values
4587 loc = tmploc
4588 skiptext = instring[startloc:loc]
4589 skipresult = ParseResults(skiptext)
4590
4591 if self.includeMatch:
4592 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4593 skipresult += mat
4594
4595 return loc, skipresult
4596
4597 class Forward(ParseElementEnhance):
4598 """Forward declaration of an expression to be defined later -
4599 used for recursive grammars, such as algebraic infix notation.
4600 When the expression is known, it is assigned to the ``Forward``
4601 variable using the '<<' operator.
4602
4603 Note: take care when assigning to ``Forward`` not to overlook
4604 precedence of operators.
4605
4606 Specifically, '|' has a lower precedence than '<<', so that::
4607
4608 fwdExpr << a | b | c
4609
4610 will actually be evaluated as::
4611
4612 (fwdExpr << a) | b | c
4613
4614 thereby leaving b and c out as parseable alternatives. It is recommended that you
4615 explicitly group the values inserted into the ``Forward``::
4616
4617 fwdExpr << (a | b | c)
4618
4619 Converting to use the '<<=' operator instead will avoid this problem.
4620
4621 See :class:`ParseResults.pprint` for an example of a recursive
4622 parser created using ``Forward``.
4623 """
4624 def __init__( self, other=None ):
4625 super(Forward,self).__init__( other, savelist=False )
4626
4627 def __lshift__( self, other ):
4628 if isinstance( other, basestring ):
4629 other = ParserElement._literalStringClass(other)
4630 self.expr = other
4631 self.strRepr = None
4632 self.mayIndexError = self.expr.mayIndexError
4633 self.mayReturnEmpty = self.expr.mayReturnEmpty
4634 self.setWhitespaceChars( self.expr.whiteChars )
4635 self.skipWhitespace = self.expr.skipWhitespace
4636 self.saveAsList = self.expr.saveAsList
4637 self.ignoreExprs.extend(self.expr.ignoreExprs)
4638 return self
4639
4640 def __ilshift__(self, other):
4641 return self << other
4642
4643 def leaveWhitespace( self ):
4644 self.skipWhitespace = False
4645 return self
4646
4647 def streamline( self ):
4648 if not self.streamlined:
4649 self.streamlined = True
4650 if self.expr is not None:
4651 self.expr.streamline()
4652 return self
4653
4654 def validate( self, validateTrace=[] ):
4655 if self not in validateTrace:
4656 tmp = validateTrace[:]+[self]
4657 if self.expr is not None:
4658 self.expr.validate(tmp)
4659 self.checkRecursion([])
4660
4661 def __str__( self ):
4662 if hasattr(self,"name"):
4663 return self.name
4664
4665 # Avoid infinite recursion by setting a temporary name
4666 self.name = self.__class__.__name__ + ": ..."
4667
4668 # Use the string representation of main expression.
4669 try:
4670 if self.expr is not None:
4671 retString = _ustr(self.expr)
4672 else:
4673 retString = "None"
4674 finally:
4675 del self.name
4676 return self.__class__.__name__ + ": " + retString
4677
4678 def copy(self):
4679 if self.expr is not None:
4680 return super(Forward,self).copy()
4681 else:
4682 ret = Forward()
4683 ret <<= self
4684 return ret
4685
4686 class TokenConverter(ParseElementEnhance):
4687 """
4688 Abstract subclass of :class:`ParseExpression`, for converting parsed results.
4689 """
4690 def __init__( self, expr, savelist=False ):
4691 super(TokenConverter,self).__init__( expr )#, savelist )
4692 self.saveAsList = False
4693
4694 class Combine(TokenConverter):
4695 """Converter to concatenate all matching tokens to a single string.
4696 By default, the matching patterns must also be contiguous in the
4697 input string; this can be disabled by specifying
4698 ``'adjacent=False'`` in the constructor.
4699
4700 Example::
4701
4702 real = Word(nums) + '.' + Word(nums)
4703 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4704 # will also erroneously match the following
4705 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4706
4707 real = Combine(Word(nums) + '.' + Word(nums))
4708 print(real.parseString('3.1416')) # -> ['3.1416']
4709 # no match when there are internal spaces
4710 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4711 """
4712 def __init__( self, expr, joinString="", adjacent=True ):
4713 super(Combine,self).__init__( expr )
4714 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
4715 if adjacent:
4716 self.leaveWhitespace()
4717 self.adjacent = adjacent
4718 self.skipWhitespace = True
4719 self.joinString = joinString
4720 self.callPreparse = True
4721
4722 def ignore( self, other ):
4723 if self.adjacent:
4724 ParserElement.ignore(self, other)
4725 else:
4726 super( Combine, self).ignore( other )
4727 return self
4728
4729 def postParse( self, instring, loc, tokenlist ):
4730 retToks = tokenlist.copy()
4731 del retToks[:]
4732 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4733
4734 if self.resultsName and retToks.haskeys():
4735 return [ retToks ]
4736 else:
4737 return retToks
4738
4739 class Group(TokenConverter):
4740 """Converter to return the matched tokens as a list - useful for
4741 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
4742
4743 Example::
4744
4745 ident = Word(alphas)
4746 num = Word(nums)
4747 term = ident | num
4748 func = ident + Optional(delimitedList(term))
4749 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
4750
4751 func = ident + Group(Optional(delimitedList(term)))
4752 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
4753 """
4754 def __init__( self, expr ):
4755 super(Group,self).__init__( expr )
4756 self.saveAsList = True
4757
4758 def postParse( self, instring, loc, tokenlist ):
4759 return [ tokenlist ]
4760
4761 class Dict(TokenConverter):
4762 """Converter to return a repetitive expression as a list, but also
4763 as a dictionary. Each element can also be referenced using the first
4764 token in the expression as its key. Useful for tabular report
4765 scraping when the first column can be used as a item key.
4766
4767 Example::
4768
4769 data_word = Word(alphas)
4770 label = data_word + FollowedBy(':')
4771 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4772
4773 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4774 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4775
4776 # print attributes as plain groups
4777 print(OneOrMore(attr_expr).parseString(text).dump())
4778
4779 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4780 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4781 print(result.dump())
4782
4783 # access named fields as dict entries, or output as dict
4784 print(result['shape'])
4785 print(result.asDict())
4786
4787 prints::
4788
4789 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4790 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4791 - color: light blue
4792 - posn: upper left
4793 - shape: SQUARE
4794 - texture: burlap
4795 SQUARE
4796 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4797
4798 See more examples at :class:`ParseResults` of accessing fields by results name.
4799 """
4800 def __init__( self, expr ):
4801 super(Dict,self).__init__( expr )
4802 self.saveAsList = True
4803
4804 def postParse( self, instring, loc, tokenlist ):
4805 for i,tok in enumerate(tokenlist):
4806 if len(tok) == 0:
4807 continue
4808 ikey = tok[0]
4809 if isinstance(ikey,int):
4810 ikey = _ustr(tok[0]).strip()
4811 if len(tok)==1:
4812 tokenlist[ikey] = _ParseResultsWithOffset("",i)
4813 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4814 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4815 else:
4816 dictvalue = tok.copy() #ParseResults(i)
4817 del dictvalue[0]
4818 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4819 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4820 else:
4821 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4822
4823 if self.resultsName:
4824 return [ tokenlist ]
4825 else:
4826 return tokenlist
4827
4828
4829 class Suppress(TokenConverter):
4830 """Converter for ignoring the results of a parsed expression.
4831
4832 Example::
4833
4834 source = "a, b, c,d"
4835 wd = Word(alphas)
4836 wd_list1 = wd + ZeroOrMore(',' + wd)
4837 print(wd_list1.parseString(source))
4838
4839 # often, delimiters that are useful during parsing are just in the
4840 # way afterward - use Suppress to keep them out of the parsed output
4841 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4842 print(wd_list2.parseString(source))
4843
4844 prints::
4845
4846 ['a', ',', 'b', ',', 'c', ',', 'd']
4847 ['a', 'b', 'c', 'd']
4848
4849 (See also :class:`delimitedList`.)
4850 """
4851 def postParse( self, instring, loc, tokenlist ):
4852 return []
4853
4854 def suppress( self ):
4855 return self
4856
4857
4858 class OnlyOnce(object):
4859 """Wrapper for parse actions, to ensure they are only called once.
4860 """
4861 def __init__(self, methodCall):
4862 self.callable = _trim_arity(methodCall)
4863 self.called = False
4864 def __call__(self,s,l,t):
4865 if not self.called:
4866 results = self.callable(s,l,t)
4867 self.called = True
4868 return results
4869 raise ParseException(s,l,"")
4870 def reset(self):
4871 self.called = False
4872
4873 def traceParseAction(f):
4874 """Decorator for debugging parse actions.
4875
4876 When the parse action is called, this decorator will print
4877 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
4878 When the parse action completes, the decorator will print
4879 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
4880
4881 Example::
4882
4883 wd = Word(alphas)
4884
4885 @traceParseAction
4886 def remove_duplicate_chars(tokens):
4887 return ''.join(sorted(set(''.join(tokens))))
4888
4889 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4890 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4891
4892 prints::
4893
4894 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4895 <<leaving remove_duplicate_chars (ret: 'dfjkls')
4896 ['dfjkls']
4897 """
4898 f = _trim_arity(f)
4899 def z(*paArgs):
4900 thisFunc = f.__name__
4901 s,l,t = paArgs[-3:]
4902 if len(paArgs)>3:
4903 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4904 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4905 try:
4906 ret = f(*paArgs)
4907 except Exception as exc:
4908 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4909 raise
4910 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4911 return ret
4912 try:
4913 z.__name__ = f.__name__
4914 except AttributeError:
4915 pass
4916 return z
4917
4918 #
4919 # global helpers
4920 #
4921 def delimitedList( expr, delim=",", combine=False ):
4922 """Helper to define a delimited list of expressions - the delimiter
4923 defaults to ','. By default, the list elements and delimiters can
4924 have intervening whitespace, and comments, but this can be
4925 overridden by passing ``combine=True`` in the constructor. If
4926 ``combine`` is set to ``True``, the matching tokens are
4927 returned as a single token string, with the delimiters included;
4928 otherwise, the matching tokens are returned as a list of tokens,
4929 with the delimiters suppressed.
4930
4931 Example::
4932
4933 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4934 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4935 """
4936 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4937 if combine:
4938 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4939 else:
4940 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4941
4942 def countedArray( expr, intExpr=None ):
4943 """Helper to define a counted list of expressions.
4944
4945 This helper defines a pattern of the form::
4946
4947 integer expr expr expr...
4948
4949 where the leading integer tells how many expr expressions follow.
4950 The matched tokens returns the array of expr tokens as a list - the
4951 leading count token is suppressed.
4952
4953 If ``intExpr`` is specified, it should be a pyparsing expression
4954 that produces an integer value.
4955
4956 Example::
4957
4958 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
4959
4960 # in this parser, the leading integer value is given in binary,
4961 # '10' indicating that 2 values are in the array
4962 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4963 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
4964 """
4965 arrayExpr = Forward()
4966 def countFieldParseAction(s,l,t):
4967 n = t[0]
4968 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4969 return []
4970 if intExpr is None:
4971 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4972 else:
4973 intExpr = intExpr.copy()
4974 intExpr.setName("arrayLen")
4975 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4976 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4977
4978 def _flatten(L):
4979 ret = []
4980 for i in L:
4981 if isinstance(i,list):
4982 ret.extend(_flatten(i))
4983 else:
4984 ret.append(i)
4985 return ret
4986
4987 def matchPreviousLiteral(expr):
4988 """Helper to define an expression that is indirectly defined from
4989 the tokens matched in a previous expression, that is, it looks for
4990 a 'repeat' of a previous expression. For example::
4991
4992 first = Word(nums)
4993 second = matchPreviousLiteral(first)
4994 matchExpr = first + ":" + second
4995
4996 will match ``"1:1"``, but not ``"1:2"``. Because this
4997 matches a previous literal, will also match the leading
4998 ``"1:1"`` in ``"1:10"``. If this is not desired, use
4999 :class:`matchPreviousExpr`. Do *not* use with packrat parsing
5000 enabled.
5001 """
5002 rep = Forward()
5003 def copyTokenToRepeater(s,l,t):
5004 if t:
5005 if len(t) == 1:
5006 rep << t[0]
5007 else:
5008 # flatten t tokens
5009 tflat = _flatten(t.asList())
5010 rep << And(Literal(tt) for tt in tflat)
5011 else:
5012 rep << Empty()
5013 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5014 rep.setName('(prev) ' + _ustr(expr))
5015 return rep
5016
5017 def matchPreviousExpr(expr):
5018 """Helper to define an expression that is indirectly defined from
5019 the tokens matched in a previous expression, that is, it looks for
5020 a 'repeat' of a previous expression. For example::
5021
5022 first = Word(nums)
5023 second = matchPreviousExpr(first)
5024 matchExpr = first + ":" + second
5025
5026 will match ``"1:1"``, but not ``"1:2"``. Because this
5027 matches by expressions, will *not* match the leading ``"1:1"``
5028 in ``"1:10"``; the expressions are evaluated first, and then
5029 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
5030 with packrat parsing enabled.
5031 """
5032 rep = Forward()
5033 e2 = expr.copy()
5034 rep <<= e2
5035 def copyTokenToRepeater(s,l,t):
5036 matchTokens = _flatten(t.asList())
5037 def mustMatchTheseTokens(s,l,t):
5038 theseTokens = _flatten(t.asList())
5039 if theseTokens != matchTokens:
5040 raise ParseException("",0,"")
5041 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
5042 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5043 rep.setName('(prev) ' + _ustr(expr))
5044 return rep
5045
5046 def _escapeRegexRangeChars(s):
5047 #~ escape these chars: ^-]
5048 for c in r"\^-]":
5049 s = s.replace(c,_bslash+c)
5050 s = s.replace("\n",r"\n")
5051 s = s.replace("\t",r"\t")
5052 return _ustr(s)
5053
5054 def oneOf( strs, caseless=False, useRegex=True ):
5055 """Helper to quickly define a set of alternative Literals, and makes
5056 sure to do longest-first testing when there is a conflict,
5057 regardless of the input order, but returns
5058 a :class:`MatchFirst` for best performance.
5059
5060 Parameters:
5061
5062 - strs - a string of space-delimited literals, or a collection of
5063 string literals
5064 - caseless - (default= ``False``) - treat all literals as
5065 caseless
5066 - useRegex - (default= ``True``) - as an optimization, will
5067 generate a Regex object; otherwise, will generate
5068 a :class:`MatchFirst` object (if ``caseless=True``, or if
5069 creating a :class:`Regex` raises an exception)
5070
5071 Example::
5072
5073 comp_oper = oneOf("< = > <= >= !=")
5074 var = Word(alphas)
5075 number = Word(nums)
5076 term = var | number
5077 comparison_expr = term + comp_oper + term
5078 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
5079
5080 prints::
5081
5082 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
5083 """
5084 if caseless:
5085 isequal = ( lambda a,b: a.upper() == b.upper() )
5086 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
5087 parseElementClass = CaselessLiteral
5088 else:
5089 isequal = ( lambda a,b: a == b )
5090 masks = ( lambda a,b: b.startswith(a) )
5091 parseElementClass = Literal
5092
5093 symbols = []
5094 if isinstance(strs,basestring):
5095 symbols = strs.split()
5096 elif isinstance(strs, Iterable):
5097 symbols = list(strs)
5098 else:
5099 warnings.warn("Invalid argument to oneOf, expected string or iterable",
5100 SyntaxWarning, stacklevel=2)
5101 if not symbols:
5102 return NoMatch()
5103
5104 i = 0
5105 while i < len(symbols)-1:
5106 cur = symbols[i]
5107 for j,other in enumerate(symbols[i+1:]):
5108 if ( isequal(other, cur) ):
5109 del symbols[i+j+1]
5110 break
5111 elif ( masks(cur, other) ):
5112 del symbols[i+j+1]
5113 symbols.insert(i,other)
5114 cur = other
5115 break
5116 else:
5117 i += 1
5118
5119 if not caseless and useRegex:
5120 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
5121 try:
5122 if len(symbols)==len("".join(symbols)):
5123 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
5124 else:
5125 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
5126 except Exception:
5127 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
5128 SyntaxWarning, stacklevel=2)
5129
5130
5131 # last resort, just use MatchFirst
5132 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
5133
5134 def dictOf( key, value ):
5135 """Helper to easily and clearly define a dictionary by specifying
5136 the respective patterns for the key and value. Takes care of
5137 defining the :class:`Dict`, :class:`ZeroOrMore`, and
5138 :class:`Group` tokens in the proper order. The key pattern
5139 can include delimiting markers or punctuation, as long as they are
5140 suppressed, thereby leaving the significant key text. The value
5141 pattern can include named results, so that the :class:`Dict` results
5142 can include named token fields.
5143
5144 Example::
5145
5146 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5147 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5148 print(OneOrMore(attr_expr).parseString(text).dump())
5149
5150 attr_label = label
5151 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
5152
5153 # similar to Dict, but simpler call format
5154 result = dictOf(attr_label, attr_value).parseString(text)
5155 print(result.dump())
5156 print(result['shape'])
5157 print(result.shape) # object attribute access works too
5158 print(result.asDict())
5159
5160 prints::
5161
5162 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5163 - color: light blue
5164 - posn: upper left
5165 - shape: SQUARE
5166 - texture: burlap
5167 SQUARE
5168 SQUARE
5169 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
5170 """
5171 return Dict(OneOrMore(Group(key + value)))
5172
5173 def originalTextFor(expr, asString=True):
5174 """Helper to return the original, untokenized text for a given
5175 expression. Useful to restore the parsed fields of an HTML start
5176 tag into the raw tag text itself, or to revert separate tokens with
5177 intervening whitespace back to the original matching input text. By
5178 default, returns astring containing the original parsed text.
5179
5180 If the optional ``asString`` argument is passed as
5181 ``False``, then the return value is
5182 a :class:`ParseResults` containing any results names that
5183 were originally matched, and a single token containing the original
5184 matched text from the input string. So if the expression passed to
5185 :class:`originalTextFor` contains expressions with defined
5186 results names, you must set ``asString`` to ``False`` if you
5187 want to preserve those results name values.
5188
5189 Example::
5190
5191 src = "this is test <b> bold <i>text</i> </b> normal text "
5192 for tag in ("b","i"):
5193 opener,closer = makeHTMLTags(tag)
5194 patt = originalTextFor(opener + SkipTo(closer) + closer)
5195 print(patt.searchString(src)[0])
5196
5197 prints::
5198
5199 ['<b> bold <i>text</i> </b>']
5200 ['<i>text</i>']
5201 """
5202 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
5203 endlocMarker = locMarker.copy()
5204 endlocMarker.callPreparse = False
5205 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
5206 if asString:
5207 extractText = lambda s,l,t: s[t._original_start:t._original_end]
5208 else:
5209 def extractText(s,l,t):
5210 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
5211 matchExpr.setParseAction(extractText)
5212 matchExpr.ignoreExprs = expr.ignoreExprs
5213 return matchExpr
5214
5215 def ungroup(expr):
5216 """Helper to undo pyparsing's default grouping of And expressions,
5217 even if all but one are non-empty.
5218 """
5219 return TokenConverter(expr).addParseAction(lambda t:t[0])
5220
5221 def locatedExpr(expr):
5222 """Helper to decorate a returned token with its starting and ending
5223 locations in the input string.
5224
5225 This helper adds the following results names:
5226
5227 - locn_start = location where matched expression begins
5228 - locn_end = location where matched expression ends
5229 - value = the actual parsed results
5230
5231 Be careful if the input text contains ``<TAB>`` characters, you
5232 may want to call :class:`ParserElement.parseWithTabs`
5233
5234 Example::
5235
5236 wd = Word(alphas)
5237 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
5238 print(match)
5239
5240 prints::
5241
5242 [[0, 'ljsdf', 5]]
5243 [[8, 'lksdjjf', 15]]
5244 [[18, 'lkkjj', 23]]
5245 """
5246 locator = Empty().setParseAction(lambda s,l,t: l)
5247 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
5248
5249
5250 # convenience constants for positional expressions
5251 empty = Empty().setName("empty")
5252 lineStart = LineStart().setName("lineStart")
5253 lineEnd = LineEnd().setName("lineEnd")
5254 stringStart = StringStart().setName("stringStart")
5255 stringEnd = StringEnd().setName("stringEnd")
5256
5257 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
5258 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
5259 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
5260 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
5261 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
5262 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
5263
5264 def srange(s):
5265 r"""Helper to easily define string ranges for use in Word
5266 construction. Borrows syntax from regexp '[]' string range
5267 definitions::
5268
5269 srange("[0-9]") -> "0123456789"
5270 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
5271 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5272
5273 The input string must be enclosed in []'s, and the returned string
5274 is the expanded character set joined into a single string. The
5275 values enclosed in the []'s may be:
5276
5277 - a single character
5278 - an escaped character with a leading backslash (such as ``\-``
5279 or ``\]``)
5280 - an escaped hex character with a leading ``'\x'``
5281 (``\x21``, which is a ``'!'`` character) (``\0x##``
5282 is also supported for backwards compatibility)
5283 - an escaped octal character with a leading ``'\0'``
5284 (``\041``, which is a ``'!'`` character)
5285 - a range of any of the above, separated by a dash (``'a-z'``,
5286 etc.)
5287 - any combination of the above (``'aeiouy'``,
5288 ``'a-zA-Z0-9_$'``, etc.)
5289 """
5290 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
5291 try:
5292 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
5293 except Exception:
5294 return ""
5295
5296 def matchOnlyAtCol(n):
5297 """Helper method for defining parse actions that require matching at
5298 a specific column in the input text.
5299 """
5300 def verifyCol(strg,locn,toks):
5301 if col(locn,strg) != n:
5302 raise ParseException(strg,locn,"matched token not at column %d" % n)
5303 return verifyCol
5304
5305 def replaceWith(replStr):
5306 """Helper method for common parse actions that simply return
5307 a literal value. Especially useful when used with
5308 :class:`transformString<ParserElement.transformString>` ().
5309
5310 Example::
5311
5312 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
5313 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
5314 term = na | num
5315
5316 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
5317 """
5318 return lambda s,l,t: [replStr]
5319
5320 def removeQuotes(s,l,t):
5321 """Helper parse action for removing quotation marks from parsed
5322 quoted strings.
5323
5324 Example::
5325
5326 # by default, quotation marks are included in parsed results
5327 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
5328
5329 # use removeQuotes to strip quotation marks from parsed results
5330 quotedString.setParseAction(removeQuotes)
5331 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
5332 """
5333 return t[0][1:-1]
5334
5335 def tokenMap(func, *args):
5336 """Helper to define a parse action by mapping a function to all
5337 elements of a ParseResults list. If any additional args are passed,
5338 they are forwarded to the given function as additional arguments
5339 after the token, as in
5340 ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``,
5341 which will convert the parsed data to an integer using base 16.
5342
5343 Example (compare the last to example in :class:`ParserElement.transformString`::
5344
5345 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
5346 hex_ints.runTests('''
5347 00 11 22 aa FF 0a 0d 1a
5348 ''')
5349
5350 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
5351 OneOrMore(upperword).runTests('''
5352 my kingdom for a horse
5353 ''')
5354
5355 wd = Word(alphas).setParseAction(tokenMap(str.title))
5356 OneOrMore(wd).setParseAction(' '.join).runTests('''
5357 now is the winter of our discontent made glorious summer by this sun of york
5358 ''')
5359
5360 prints::
5361
5362 00 11 22 aa FF 0a 0d 1a
5363 [0, 17, 34, 170, 255, 10, 13, 26]
5364
5365 my kingdom for a horse
5366 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5367
5368 now is the winter of our discontent made glorious summer by this sun of york
5369 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5370 """
5371 def pa(s,l,t):
5372 return [func(tokn, *args) for tokn in t]
5373
5374 try:
5375 func_name = getattr(func, '__name__',
5376 getattr(func, '__class__').__name__)
5377 except Exception:
5378 func_name = str(func)
5379 pa.__name__ = func_name
5380
5381 return pa
5382
5383 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
5384 """(Deprecated) Helper parse action to convert tokens to upper case.
5385 Deprecated in favor of :class:`pyparsing_common.upcaseTokens`"""
5386
5387 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
5388 """(Deprecated) Helper parse action to convert tokens to lower case.
5389 Deprecated in favor of :class:`pyparsing_common.downcaseTokens`"""
5390
5391 def _makeTags(tagStr, xml,
5392 suppress_LT=Suppress("<"),
5393 suppress_GT=Suppress(">")):
5394 """Internal helper to construct opening and closing tag expressions, given a tag name"""
5395 if isinstance(tagStr,basestring):
5396 resname = tagStr
5397 tagStr = Keyword(tagStr, caseless=not xml)
5398 else:
5399 resname = tagStr.name
5400
5401 tagAttrName = Word(alphas,alphanums+"_-:")
5402 if (xml):
5403 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
5404 openTag = (suppress_LT
5405 + tagStr("tag")
5406 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue )))
5407 + Optional("/", default=[False])("empty").setParseAction(lambda s,l,t:t[0]=='/')
5408 + suppress_GT)
5409 else:
5410 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printables, excludeChars=">")
5411 openTag = (suppress_LT
5412 + tagStr("tag")
5413 + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens)
5414 + Optional(Suppress("=") + tagAttrValue))))
5415 + Optional("/",default=[False])("empty").setParseAction(lambda s,l,t:t[0]=='/')
5416 + suppress_GT)
5417 closeTag = Combine(_L("</") + tagStr + ">", adjacent=False)
5418
5419 openTag.setName("<%s>" % resname)
5420 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
5421 openTag.addParseAction(lambda t: t.__setitem__("start"+"".join(resname.replace(":"," ").title().split()), t.copy()))
5422 closeTag = closeTag("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
5423 openTag.tag = resname
5424 closeTag.tag = resname
5425 openTag.tag_body = SkipTo(closeTag())
5426 return openTag, closeTag
5427
5428 def makeHTMLTags(tagStr):
5429 """Helper to construct opening and closing tag expressions for HTML,
5430 given a tag name. Matches tags in either upper or lower case,
5431 attributes with namespaces and with quoted or unquoted values.
5432
5433 Example::
5434
5435 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
5436 # makeHTMLTags returns pyparsing expressions for the opening and
5437 # closing tags as a 2-tuple
5438 a,a_end = makeHTMLTags("A")
5439 link_expr = a + SkipTo(a_end)("link_text") + a_end
5440
5441 for link in link_expr.searchString(text):
5442 # attributes in the <A> tag (like "href" shown here) are
5443 # also accessible as named results
5444 print(link.link_text, '->', link.href)
5445
5446 prints::
5447
5448 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
5449 """
5450 return _makeTags( tagStr, False )
5451
5452 def makeXMLTags(tagStr):
5453 """Helper to construct opening and closing tag expressions for XML,
5454 given a tag name. Matches tags only in the given upper/lower case.
5455
5456 Example: similar to :class:`makeHTMLTags`
5457 """
5458 return _makeTags( tagStr, True )
5459
5460 def withAttribute(*args,**attrDict):
5461 """Helper to create a validating parse action to be used with start
5462 tags created with :class:`makeXMLTags` or
5463 :class:`makeHTMLTags`. Use ``withAttribute`` to qualify
5464 a starting tag with a required attribute value, to avoid false
5465 matches on common tags such as ``<TD>`` or ``<DIV>``.
5466
5467 Call ``withAttribute`` with a series of attribute names and
5468 values. Specify the list of filter attributes names and values as:
5469
5470 - keyword arguments, as in ``(align="right")``, or
5471 - as an explicit dict with ``**`` operator, when an attribute
5472 name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
5473 - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align","right"))``
5474
5475 For attribute names with a namespace prefix, you must use the second
5476 form. Attribute names are matched insensitive to upper/lower case.
5477
5478 If just testing for ``class`` (with or without a namespace), use
5479 :class:`withClass`.
5480
5481 To verify that the attribute exists, but without specifying a value,
5482 pass ``withAttribute.ANY_VALUE`` as the value.
5483
5484 Example::
5485
5486 html = '''
5487 <div>
5488 Some text
5489 <div type="grid">1 4 0 1 0</div>
5490 <div type="graph">1,3 2,3 1,1</div>
5491 <div>this has no type</div>
5492 </div>
5493
5494 '''
5495 div,div_end = makeHTMLTags("div")
5496
5497 # only match div tag having a type attribute with value "grid"
5498 div_grid = div().setParseAction(withAttribute(type="grid"))
5499 grid_expr = div_grid + SkipTo(div | div_end)("body")
5500 for grid_header in grid_expr.searchString(html):
5501 print(grid_header.body)
5502
5503 # construct a match with any div tag having a type attribute, regardless of the value
5504 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
5505 div_expr = div_any_type + SkipTo(div | div_end)("body")
5506 for div_header in div_expr.searchString(html):
5507 print(div_header.body)
5508
5509 prints::
5510
5511 1 4 0 1 0
5512
5513 1 4 0 1 0
5514 1,3 2,3 1,1
5515 """
5516 if args:
5517 attrs = args[:]
5518 else:
5519 attrs = attrDict.items()
5520 attrs = [(k,v) for k,v in attrs]
5521 def pa(s,l,tokens):
5522 for attrName,attrValue in attrs:
5523 if attrName not in tokens:
5524 raise ParseException(s,l,"no matching attribute " + attrName)
5525 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
5526 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
5527 (attrName, tokens[attrName], attrValue))
5528 return pa
5529 withAttribute.ANY_VALUE = object()
5530
5531 def withClass(classname, namespace=''):
5532 """Simplified version of :class:`withAttribute` when
5533 matching on a div class - made difficult because ``class`` is
5534 a reserved word in Python.
5535
5536 Example::
5537
5538 html = '''
5539 <div>
5540 Some text
5541 <div class="grid">1 4 0 1 0</div>
5542 <div class="graph">1,3 2,3 1,1</div>
5543 <div>this &lt;div&gt; has no class</div>
5544 </div>
5545
5546 '''
5547 div,div_end = makeHTMLTags("div")
5548 div_grid = div().setParseAction(withClass("grid"))
5549
5550 grid_expr = div_grid + SkipTo(div | div_end)("body")
5551 for grid_header in grid_expr.searchString(html):
5552 print(grid_header.body)
5553
5554 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5555 div_expr = div_any_type + SkipTo(div | div_end)("body")
5556 for div_header in div_expr.searchString(html):
5557 print(div_header.body)
5558
5559 prints::
5560
5561 1 4 0 1 0
5562
5563 1 4 0 1 0
5564 1,3 2,3 1,1
5565 """
5566 classattr = "%s:class" % namespace if namespace else "class"
5567 return withAttribute(**{classattr : classname})
5568
5569 opAssoc = SimpleNamespace()
5570 opAssoc.LEFT = object()
5571 opAssoc.RIGHT = object()
5572
5573 def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5574 """Helper method for constructing grammars of expressions made up of
5575 operators working in a precedence hierarchy. Operators may be unary
5576 or binary, left- or right-associative. Parse actions can also be
5577 attached to operator expressions. The generated parser will also
5578 recognize the use of parentheses to override operator precedences
5579 (see example below).
5580
5581 Note: if you define a deep operator list, you may see performance
5582 issues when using infixNotation. See
5583 :class:`ParserElement.enablePackrat` for a mechanism to potentially
5584 improve your parser performance.
5585
5586 Parameters:
5587 - baseExpr - expression representing the most basic element for the
5588 nested
5589 - opList - list of tuples, one for each operator precedence level
5590 in the expression grammar; each tuple is of the form ``(opExpr,
5591 numTerms, rightLeftAssoc, parseAction)``, where:
5592
5593 - opExpr is the pyparsing expression for the operator; may also
5594 be a string, which will be converted to a Literal; if numTerms
5595 is 3, opExpr is a tuple of two expressions, for the two
5596 operators separating the 3 terms
5597 - numTerms is the number of terms for this operator (must be 1,
5598 2, or 3)
5599 - rightLeftAssoc is the indicator whether the operator is right
5600 or left associative, using the pyparsing-defined constants
5601 ``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
5602 - parseAction is the parse action to be associated with
5603 expressions matching this operator expression (the parse action
5604 tuple member may be omitted); if the parse action is passed
5605 a tuple or list of functions, this is equivalent to calling
5606 ``setParseAction(*fn)``
5607 (:class:`ParserElement.setParseAction`)
5608 - lpar - expression for matching left-parentheses
5609 (default= ``Suppress('(')``)
5610 - rpar - expression for matching right-parentheses
5611 (default= ``Suppress(')')``)
5612
5613 Example::
5614
5615 # simple example of four-function arithmetic with ints and
5616 # variable names
5617 integer = pyparsing_common.signed_integer
5618 varname = pyparsing_common.identifier
5619
5620 arith_expr = infixNotation(integer | varname,
5621 [
5622 ('-', 1, opAssoc.RIGHT),
5623 (oneOf('* /'), 2, opAssoc.LEFT),
5624 (oneOf('+ -'), 2, opAssoc.LEFT),
5625 ])
5626
5627 arith_expr.runTests('''
5628 5+3*6
5629 (5+3)*6
5630 -2--11
5631 ''', fullDump=False)
5632
5633 prints::
5634
5635 5+3*6
5636 [[5, '+', [3, '*', 6]]]
5637
5638 (5+3)*6
5639 [[[5, '+', 3], '*', 6]]
5640
5641 -2--11
5642 [[['-', 2], '-', ['-', 11]]]
5643 """
5644 # captive version of FollowedBy that does not do parse actions or capture results names
5645 class _FB(FollowedBy):
5646 def parseImpl(self, instring, loc, doActions=True):
5647 self.expr.tryParse(instring, loc)
5648 return loc, []
5649
5650 ret = Forward()
5651 lastExpr = baseExpr | ( lpar + ret + rpar )
5652 for i,operDef in enumerate(opList):
5653 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5654 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5655 if arity == 3:
5656 if opExpr is None or len(opExpr) != 2:
5657 raise ValueError(
5658 "if numterms=3, opExpr must be a tuple or list of two expressions")
5659 opExpr1, opExpr2 = opExpr
5660 thisExpr = Forward().setName(termName)
5661 if rightLeftAssoc == opAssoc.LEFT:
5662 if arity == 1:
5663 matchExpr = _FB(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5664 elif arity == 2:
5665 if opExpr is not None:
5666 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5667 else:
5668 matchExpr = _FB(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5669 elif arity == 3:
5670 matchExpr = _FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5671 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5672 else:
5673 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5674 elif rightLeftAssoc == opAssoc.RIGHT:
5675 if arity == 1:
5676 # try to avoid LR with this extra test
5677 if not isinstance(opExpr, Optional):
5678 opExpr = Optional(opExpr)
5679 matchExpr = _FB(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5680 elif arity == 2:
5681 if opExpr is not None:
5682 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5683 else:
5684 matchExpr = _FB(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5685 elif arity == 3:
5686 matchExpr = _FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5687 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5688 else:
5689 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5690 else:
5691 raise ValueError("operator must indicate right or left associativity")
5692 if pa:
5693 if isinstance(pa, (tuple, list)):
5694 matchExpr.setParseAction(*pa)
5695 else:
5696 matchExpr.setParseAction(pa)
5697 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5698 lastExpr = thisExpr
5699 ret <<= lastExpr
5700 return ret
5701
5702 operatorPrecedence = infixNotation
5703 """(Deprecated) Former name of :class:`infixNotation`, will be
5704 dropped in a future release."""
5705
5706 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5707 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5708 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5709 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5710 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5711
5712 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5713 """Helper method for defining nested lists enclosed in opening and
5714 closing delimiters ("(" and ")" are the default).
5715
5716 Parameters:
5717 - opener - opening character for a nested list
5718 (default= ``"("``); can also be a pyparsing expression
5719 - closer - closing character for a nested list
5720 (default= ``")"``); can also be a pyparsing expression
5721 - content - expression for items within the nested lists
5722 (default= ``None``)
5723 - ignoreExpr - expression for ignoring opening and closing
5724 delimiters (default= :class:`quotedString`)
5725
5726 If an expression is not provided for the content argument, the
5727 nested expression will capture all whitespace-delimited content
5728 between delimiters as a list of separate values.
5729
5730 Use the ``ignoreExpr`` argument to define expressions that may
5731 contain opening or closing characters that should not be treated as
5732 opening or closing characters for nesting, such as quotedString or
5733 a comment expression. Specify multiple expressions using an
5734 :class:`Or` or :class:`MatchFirst`. The default is
5735 :class:`quotedString`, but if no expressions are to be ignored, then
5736 pass ``None`` for this argument.
5737
5738 Example::
5739
5740 data_type = oneOf("void int short long char float double")
5741 decl_data_type = Combine(data_type + Optional(Word('*')))
5742 ident = Word(alphas+'_', alphanums+'_')
5743 number = pyparsing_common.number
5744 arg = Group(decl_data_type + ident)
5745 LPAR,RPAR = map(Suppress, "()")
5746
5747 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5748
5749 c_function = (decl_data_type("type")
5750 + ident("name")
5751 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5752 + code_body("body"))
5753 c_function.ignore(cStyleComment)
5754
5755 source_code = '''
5756 int is_odd(int x) {
5757 return (x%2);
5758 }
5759
5760 int dec_to_hex(char hchar) {
5761 if (hchar >= '0' && hchar <= '9') {
5762 return (ord(hchar)-ord('0'));
5763 } else {
5764 return (10+ord(hchar)-ord('A'));
5765 }
5766 }
5767 '''
5768 for func in c_function.searchString(source_code):
5769 print("%(name)s (%(type)s) args: %(args)s" % func)
5770
5771
5772 prints::
5773
5774 is_odd (int) args: [['int', 'x']]
5775 dec_to_hex (int) args: [['char', 'hchar']]
5776 """
5777 if opener == closer:
5778 raise ValueError("opening and closing strings cannot be the same")
5779 if content is None:
5780 if isinstance(opener,basestring) and isinstance(closer,basestring):
5781 if len(opener) == 1 and len(closer)==1:
5782 if ignoreExpr is not None:
5783 content = (Combine(OneOrMore(~ignoreExpr +
5784 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5785 ).setParseAction(lambda t:t[0].strip()))
5786 else:
5787 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5788 ).setParseAction(lambda t:t[0].strip()))
5789 else:
5790 if ignoreExpr is not None:
5791 content = (Combine(OneOrMore(~ignoreExpr +
5792 ~Literal(opener) + ~Literal(closer) +
5793 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5794 ).setParseAction(lambda t:t[0].strip()))
5795 else:
5796 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5797 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5798 ).setParseAction(lambda t:t[0].strip()))
5799 else:
5800 raise ValueError("opening and closing arguments must be strings if no content expression is given")
5801 ret = Forward()
5802 if ignoreExpr is not None:
5803 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5804 else:
5805 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
5806 ret.setName('nested %s%s expression' % (opener,closer))
5807 return ret
5808
5809 def indentedBlock(blockStatementExpr, indentStack, indent=True):
5810 """Helper method for defining space-delimited indentation blocks,
5811 such as those used to define block statements in Python source code.
5812
5813 Parameters:
5814
5815 - blockStatementExpr - expression defining syntax of statement that
5816 is repeated within the indented block
5817 - indentStack - list created by caller to manage indentation stack
5818 (multiple statementWithIndentedBlock expressions within a single
5819 grammar should share a common indentStack)
5820 - indent - boolean indicating whether block must be indented beyond
5821 the the current level; set to False for block of left-most
5822 statements (default= ``True``)
5823
5824 A valid block must contain at least one ``blockStatement``.
5825
5826 Example::
5827
5828 data = '''
5829 def A(z):
5830 A1
5831 B = 100
5832 G = A2
5833 A2
5834 A3
5835 B
5836 def BB(a,b,c):
5837 BB1
5838 def BBA():
5839 bba1
5840 bba2
5841 bba3
5842 C
5843 D
5844 def spam(x,y):
5845 def eggs(z):
5846 pass
5847 '''
5848
5849
5850 indentStack = [1]
5851 stmt = Forward()
5852
5853 identifier = Word(alphas, alphanums)
5854 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5855 func_body = indentedBlock(stmt, indentStack)
5856 funcDef = Group( funcDecl + func_body )
5857
5858 rvalue = Forward()
5859 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5860 rvalue << (funcCall | identifier | Word(nums))
5861 assignment = Group(identifier + "=" + rvalue)
5862 stmt << ( funcDef | assignment | identifier )
5863
5864 module_body = OneOrMore(stmt)
5865
5866 parseTree = module_body.parseString(data)
5867 parseTree.pprint()
5868
5869 prints::
5870
5871 [['def',
5872 'A',
5873 ['(', 'z', ')'],
5874 ':',
5875 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5876 'B',
5877 ['def',
5878 'BB',
5879 ['(', 'a', 'b', 'c', ')'],
5880 ':',
5881 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5882 'C',
5883 'D',
5884 ['def',
5885 'spam',
5886 ['(', 'x', 'y', ')'],
5887 ':',
5888 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5889 """
5890 backup_stack = indentStack[:]
5891
5892 def reset_stack():
5893 indentStack[:] = backup_stack
5894
5895 def checkPeerIndent(s,l,t):
5896 if l >= len(s): return
5897 curCol = col(l,s)
5898 if curCol != indentStack[-1]:
5899 if curCol > indentStack[-1]:
5900 raise ParseException(s,l,"illegal nesting")
5901 raise ParseException(s,l,"not a peer entry")
5902
5903 def checkSubIndent(s,l,t):
5904 curCol = col(l,s)
5905 if curCol > indentStack[-1]:
5906 indentStack.append( curCol )
5907 else:
5908 raise ParseException(s,l,"not a subentry")
5909
5910 def checkUnindent(s,l,t):
5911 if l >= len(s): return
5912 curCol = col(l,s)
5913 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5914 raise ParseException(s,l,"not an unindent")
5915 indentStack.pop()
5916
5917 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5918 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5919 PEER = Empty().setParseAction(checkPeerIndent).setName('')
5920 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5921 if indent:
5922 smExpr = Group( Optional(NL) +
5923 #~ FollowedBy(blockStatementExpr) +
5924 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5925 else:
5926 smExpr = Group( Optional(NL) +
5927 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5928 smExpr.setFailAction(lambda a, b, c, d: reset_stack())
5929 blockStatementExpr.ignore(_bslash + LineEnd())
5930 return smExpr.setName('indented block')
5931
5932 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5933 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5934
5935 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5936 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5937 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5938 def replaceHTMLEntity(t):
5939 """Helper parser action to replace common HTML entities with their special characters"""
5940 return _htmlEntityMap.get(t.entity)
5941
5942 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
5943 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5944 "Comment of the form ``/* ... */``"
5945
5946 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5947 "Comment of the form ``<!-- ... -->``"
5948
5949 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5950 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5951 "Comment of the form ``// ... (to end of line)``"
5952
5953 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5954 "Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"
5955
5956 javaStyleComment = cppStyleComment
5957 "Same as :class:`cppStyleComment`"
5958
5959 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5960 "Comment of the form ``# ... (to end of line)``"
5961
5962 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5963 Optional( Word(" \t") +
5964 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5965 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5966 """(Deprecated) Predefined expression of 1 or more printable words or
5967 quoted strings, separated by commas.
5968
5969 This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`.
5970 """
5971
5972 # some other useful expressions - using lower-case class name since we are really using this as a namespace
5973 class pyparsing_common:
5974 """Here are some common low-level expressions that may be useful in
5975 jump-starting parser development:
5976
5977 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
5978 :class:`scientific notation<sci_real>`)
5979 - common :class:`programming identifiers<identifier>`
5980 - network addresses (:class:`MAC<mac_address>`,
5981 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
5982 - ISO8601 :class:`dates<iso8601_date>` and
5983 :class:`datetime<iso8601_datetime>`
5984 - :class:`UUID<uuid>`
5985 - :class:`comma-separated list<comma_separated_list>`
5986
5987 Parse actions:
5988
5989 - :class:`convertToInteger`
5990 - :class:`convertToFloat`
5991 - :class:`convertToDate`
5992 - :class:`convertToDatetime`
5993 - :class:`stripHTMLTags`
5994 - :class:`upcaseTokens`
5995 - :class:`downcaseTokens`
5996
5997 Example::
5998
5999 pyparsing_common.number.runTests('''
6000 # any int or real number, returned as the appropriate type
6001 100
6002 -100
6003 +100
6004 3.14159
6005 6.02e23
6006 1e-12
6007 ''')
6008
6009 pyparsing_common.fnumber.runTests('''
6010 # any int or real number, returned as float
6011 100
6012 -100
6013 +100
6014 3.14159
6015 6.02e23
6016 1e-12
6017 ''')
6018
6019 pyparsing_common.hex_integer.runTests('''
6020 # hex numbers
6021 100
6022 FF
6023 ''')
6024
6025 pyparsing_common.fraction.runTests('''
6026 # fractions
6027 1/2
6028 -3/4
6029 ''')
6030
6031 pyparsing_common.mixed_integer.runTests('''
6032 # mixed fractions
6033 1
6034 1/2
6035 -3/4
6036 1-3/4
6037 ''')
6038
6039 import uuid
6040 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
6041 pyparsing_common.uuid.runTests('''
6042 # uuid
6043 12345678-1234-5678-1234-567812345678
6044 ''')
6045
6046 prints::
6047
6048 # any int or real number, returned as the appropriate type
6049 100
6050 [100]
6051
6052 -100
6053 [-100]
6054
6055 +100
6056 [100]
6057
6058 3.14159
6059 [3.14159]
6060
6061 6.02e23
6062 [6.02e+23]
6063
6064 1e-12
6065 [1e-12]
6066
6067 # any int or real number, returned as float
6068 100
6069 [100.0]
6070
6071 -100
6072 [-100.0]
6073
6074 +100
6075 [100.0]
6076
6077 3.14159
6078 [3.14159]
6079
6080 6.02e23
6081 [6.02e+23]
6082
6083 1e-12
6084 [1e-12]
6085
6086 # hex numbers
6087 100
6088 [256]
6089
6090 FF
6091 [255]
6092
6093 # fractions
6094 1/2
6095 [0.5]
6096
6097 -3/4
6098 [-0.75]
6099
6100 # mixed fractions
6101 1
6102 [1]
6103
6104 1/2
6105 [0.5]
6106
6107 -3/4
6108 [-0.75]
6109
6110 1-3/4
6111 [1.75]
6112
6113 # uuid
6114 12345678-1234-5678-1234-567812345678
6115 [UUID('12345678-1234-5678-1234-567812345678')]
6116 """
6117
6118 convertToInteger = tokenMap(int)
6119 """
6120 Parse action for converting parsed integers to Python int
6121 """
6122
6123 convertToFloat = tokenMap(float)
6124 """
6125 Parse action for converting parsed numbers to Python float
6126 """
6127
6128 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
6129 """expression that parses an unsigned integer, returns an int"""
6130
6131 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
6132 """expression that parses a hexadecimal integer, returns an int"""
6133
6134 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
6135 """expression that parses an integer with optional leading sign, returns an int"""
6136
6137 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
6138 """fractional expression of an integer divided by an integer, returns a float"""
6139 fraction.addParseAction(lambda t: t[0]/t[-1])
6140
6141 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
6142 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
6143 mixed_integer.addParseAction(sum)
6144
6145 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
6146 """expression that parses a floating point number and returns a float"""
6147
6148 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
6149 """expression that parses a floating point number with optional
6150 scientific notation and returns a float"""
6151
6152 # streamlining this expression makes the docs nicer-looking
6153 number = (sci_real | real | signed_integer).streamline()
6154 """any numeric expression, returns the corresponding Python type"""
6155
6156 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
6157 """any int or real number, returned as float"""
6158
6159 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
6160 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
6161
6162 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
6163 "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
6164
6165 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
6166 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
6167 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
6168 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
6169 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
6170 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
6171 "IPv6 address (long, short, or mixed form)"
6172
6173 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
6174 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
6175
6176 @staticmethod
6177 def convertToDate(fmt="%Y-%m-%d"):
6178 """
6179 Helper to create a parse action for converting parsed date string to Python datetime.date
6180
6181 Params -
6182 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
6183
6184 Example::
6185
6186 date_expr = pyparsing_common.iso8601_date.copy()
6187 date_expr.setParseAction(pyparsing_common.convertToDate())
6188 print(date_expr.parseString("1999-12-31"))
6189
6190 prints::
6191
6192 [datetime.date(1999, 12, 31)]
6193 """
6194 def cvt_fn(s,l,t):
6195 try:
6196 return datetime.strptime(t[0], fmt).date()
6197 except ValueError as ve:
6198 raise ParseException(s, l, str(ve))
6199 return cvt_fn
6200
6201 @staticmethod
6202 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
6203 """Helper to create a parse action for converting parsed
6204 datetime string to Python datetime.datetime
6205
6206 Params -
6207 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
6208
6209 Example::
6210
6211 dt_expr = pyparsing_common.iso8601_datetime.copy()
6212 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
6213 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
6214
6215 prints::
6216
6217 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
6218 """
6219 def cvt_fn(s,l,t):
6220 try:
6221 return datetime.strptime(t[0], fmt)
6222 except ValueError as ve:
6223 raise ParseException(s, l, str(ve))
6224 return cvt_fn
6225
6226 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
6227 "ISO8601 date (``yyyy-mm-dd``)"
6228
6229 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
6230 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
6231
6232 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
6233 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
6234
6235 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
6236 @staticmethod
6237 def stripHTMLTags(s, l, tokens):
6238 """Parse action to remove HTML tags from web page HTML source
6239
6240 Example::
6241
6242 # strip HTML links from normal text
6243 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
6244 td,td_end = makeHTMLTags("TD")
6245 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
6246 print(table_text.parseString(text).body)
6247
6248 Prints::
6249
6250 More info at the pyparsing wiki page
6251 """
6252 return pyparsing_common._html_stripper.transformString(tokens[0])
6253
6254 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
6255 + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
6256 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
6257 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
6258
6259 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
6260 """Parse action to convert tokens to upper case."""
6261
6262 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
6263 """Parse action to convert tokens to lower case."""
6264
6265
6266 class _lazyclassproperty(object):
6267 def __init__(self, fn):
6268 self.fn = fn
6269 self.__doc__ = fn.__doc__
6270 self.__name__ = fn.__name__
6271
6272 def __get__(self, obj, cls):
6273 if cls is None:
6274 cls = type(obj)
6275 if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', []) for superclass in cls.__mro__[1:]):
6276 cls._intern = {}
6277 attrname = self.fn.__name__
6278 if attrname not in cls._intern:
6279 cls._intern[attrname] = self.fn(cls)
6280 return cls._intern[attrname]
6281
6282
6283 class unicode_set(object):
6284 """
6285 A set of Unicode characters, for language-specific strings for
6286 ``alphas``, ``nums``, ``alphanums``, and ``printables``.
6287 A unicode_set is defined by a list of ranges in the Unicode character
6288 set, in a class attribute ``_ranges``, such as::
6289
6290 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6291
6292 A unicode set can also be defined using multiple inheritance of other unicode sets::
6293
6294 class CJK(Chinese, Japanese, Korean):
6295 pass
6296 """
6297 _ranges = []
6298
6299 @classmethod
6300 def _get_chars_for_ranges(cls):
6301 ret = []
6302 for cc in cls.__mro__:
6303 if cc is unicode_set:
6304 break
6305 for rr in cc._ranges:
6306 ret.extend(range(rr[0], rr[-1]+1))
6307 return [unichr(c) for c in sorted(set(ret))]
6308
6309 @_lazyclassproperty
6310 def printables(cls):
6311 "all non-whitespace characters in this range"
6312 return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges()))
6313
6314 @_lazyclassproperty
6315 def alphas(cls):
6316 "all alphabetic characters in this range"
6317 return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges()))
6318
6319 @_lazyclassproperty
6320 def nums(cls):
6321 "all numeric digit characters in this range"
6322 return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges()))
6323
6324 @_lazyclassproperty
6325 def alphanums(cls):
6326 "all alphanumeric characters in this range"
6327 return cls.alphas + cls.nums
6328
6329
6330 class pyparsing_unicode(unicode_set):
6331 """
6332 A namespace class for defining common language unicode_sets.
6333 """
6334 _ranges = [(32, sys.maxunicode)]
6335
6336 class Latin1(unicode_set):
6337 "Unicode set for Latin-1 Unicode Character Range"
6338 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6339
6340 class LatinA(unicode_set):
6341 "Unicode set for Latin-A Unicode Character Range"
6342 _ranges = [(0x0100, 0x017f),]
6343
6344 class LatinB(unicode_set):
6345 "Unicode set for Latin-B Unicode Character Range"
6346 _ranges = [(0x0180, 0x024f),]
6347
6348 class Greek(unicode_set):
6349 "Unicode set for Greek Unicode Character Ranges"
6350 _ranges = [
6351 (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
6352 (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
6353 (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
6354 ]
6355
6356 class Cyrillic(unicode_set):
6357 "Unicode set for Cyrillic Unicode Character Range"
6358 _ranges = [(0x0400, 0x04ff)]
6359
6360 class Chinese(unicode_set):
6361 "Unicode set for Chinese Unicode Character Range"
6362 _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f), ]
6363
6364 class Japanese(unicode_set):
6365 "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
6366 _ranges = [ ]
6367
6368 class Kanji(unicode_set):
6369 "Unicode set for Kanji Unicode Character Range"
6370 _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f), ]
6371
6372 class Hiragana(unicode_set):
6373 "Unicode set for Hiragana Unicode Character Range"
6374 _ranges = [(0x3040, 0x309f), ]
6375
6376 class Katakana(unicode_set):
6377 "Unicode set for Katakana Unicode Character Range"
6378 _ranges = [(0x30a0, 0x30ff), ]
6379
6380 class Korean(unicode_set):
6381 "Unicode set for Korean Unicode Character Range"
6382 _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f), ]
6383
6384 class CJK(Chinese, Japanese, Korean):
6385 "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
6386 pass
6387
6388 class Thai(unicode_set):
6389 "Unicode set for Thai Unicode Character Range"
6390 _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b), ]
6391
6392 class Arabic(unicode_set):
6393 "Unicode set for Arabic Unicode Character Range"
6394 _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f), ]
6395
6396 class Hebrew(unicode_set):
6397 "Unicode set for Hebrew Unicode Character Range"
6398 _ranges = [(0x0590, 0x05ff), ]
6399
6400 class Devanagari(unicode_set):
6401 "Unicode set for Devanagari Unicode Character Range"
6402 _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
6403
6404 pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges
6405 + pyparsing_unicode.Japanese.Hiragana._ranges
6406 + pyparsing_unicode.Japanese.Katakana._ranges)
6407
6408 # define ranges in language character sets
6409 if PY_3:
6410 setattr(pyparsing_unicode, "العربية", pyparsing_unicode.Arabic)
6411 setattr(pyparsing_unicode, "中文", pyparsing_unicode.Chinese)
6412 setattr(pyparsing_unicode, "кириллица", pyparsing_unicode.Cyrillic)
6413 setattr(pyparsing_unicode, "Ελληνικά", pyparsing_unicode.Greek)
6414 setattr(pyparsing_unicode, "עִברִית", pyparsing_unicode.Hebrew)
6415 setattr(pyparsing_unicode, "日本語", pyparsing_unicode.Japanese)
6416 setattr(pyparsing_unicode.Japanese, "漢字", pyparsing_unicode.Japanese.Kanji)
6417 setattr(pyparsing_unicode.Japanese, "カタカナ", pyparsing_unicode.Japanese.Katakana)
6418 setattr(pyparsing_unicode.Japanese, "ひらがな", pyparsing_unicode.Japanese.Hiragana)
6419 setattr(pyparsing_unicode, "한국어", pyparsing_unicode.Korean)
6420 setattr(pyparsing_unicode, "ไทย", pyparsing_unicode.Thai)
6421 setattr(pyparsing_unicode, "देवनागरी", pyparsing_unicode.Devanagari)
6422
6423
6424 if __name__ == "__main__":
6425
6426 selectToken = CaselessLiteral("select")
6427 fromToken = CaselessLiteral("from")
6428
6429 ident = Word(alphas, alphanums + "_$")
6430
6431 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
6432 columnNameList = Group(delimitedList(columnName)).setName("columns")
6433 columnSpec = ('*' | columnNameList)
6434
6435 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
6436 tableNameList = Group(delimitedList(tableName)).setName("tables")
6437
6438 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
6439
6440 # demo runTests method, including embedded comments in test string
6441 simpleSQL.runTests("""
6442 # '*' as column list and dotted table name
6443 select * from SYS.XYZZY
6444
6445 # caseless match on "SELECT", and casts back to "select"
6446 SELECT * from XYZZY, ABC
6447
6448 # list of column names, and mixed case SELECT keyword
6449 Select AA,BB,CC from Sys.dual
6450
6451 # multiple tables
6452 Select A, B, C from Sys.dual, Table2
6453
6454 # invalid SELECT keyword - should fail
6455 Xelect A, B, C from Sys.dual
6456
6457 # incomplete command - should fail
6458 Select
6459
6460 # invalid column name - should fail
6461 Select ^^^ frox Sys.dual
6462
6463 """)
6464
6465 pyparsing_common.number.runTests("""
6466 100
6467 -100
6468 +100
6469 3.14159
6470 6.02e23
6471 1e-12
6472 """)
6473
6474 # any int or real number, returned as float
6475 pyparsing_common.fnumber.runTests("""
6476 100
6477 -100
6478 +100
6479 3.14159
6480 6.02e23
6481 1e-12
6482 """)
6483
6484 pyparsing_common.hex_integer.runTests("""
6485 100
6486 FF
6487 """)
6488
6489 import uuid
6490 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
6491 pyparsing_common.uuid.runTests("""
6492 12345678-1234-5678-1234-567812345678
6493 """)