comparison lib/python3.8/site-packages/pkg_resources/_vendor/pyparsing.py @ 0:9e54283cc701 draft

"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author guerler
date Mon, 27 Jul 2020 03:47:31 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9e54283cc701
1 # module pyparsing.py
2 #
3 # Copyright (c) 2003-2018 Paul T. McGuire
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28 =============================================================================
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form
36 C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
37 (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
38 L{Literal} expressions)::
39
40 from pyparsing import Word, alphas
41
42 # define grammar of a greeting
43 greet = Word(alphas) + "," + Word(alphas) + "!"
44
45 hello = "Hello, World!"
46 print (hello, "->", greet.parseString(hello))
47
48 The program outputs the following::
49
50 Hello, World! -> ['Hello', ',', 'World', '!']
51
52 The Python representation of the grammar is quite readable, owing to the self-explanatory
53 class names, and the use of '+', '|' and '^' operators.
54
55 The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
56 object with named attributes.
57
58 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
60 - quoted strings
61 - embedded comments
62
63
64 Getting Started -
65 -----------------
66 Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
67 classes inherit from. Use the docstrings for examples of how to:
68 - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
69 - construct character word-group expressions using the L{Word} class
70 - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
71 - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
72 - associate names with your parsed results using L{ParserElement.setResultsName}
73 - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
74 - find more useful common expressions in the L{pyparsing_common} namespace class
75 """
76
77 __version__ = "2.2.1"
78 __versionTime__ = "18 Sep 2018 00:49 UTC"
79 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
80
81 import string
82 from weakref import ref as wkref
83 import copy
84 import sys
85 import warnings
86 import re
87 import sre_constants
88 import collections
89 import pprint
90 import traceback
91 import types
92 from datetime import datetime
93
94 try:
95 from _thread import RLock
96 except ImportError:
97 from threading import RLock
98
99 try:
100 # Python 3
101 from collections.abc import Iterable
102 from collections.abc import MutableMapping
103 except ImportError:
104 # Python 2.7
105 from collections import Iterable
106 from collections import MutableMapping
107
108 try:
109 from collections import OrderedDict as _OrderedDict
110 except ImportError:
111 try:
112 from ordereddict import OrderedDict as _OrderedDict
113 except ImportError:
114 _OrderedDict = None
115
116 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
117
118 __all__ = [
119 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
120 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
121 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
122 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
123 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
124 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
125 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
126 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
127 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
128 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
129 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
130 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
131 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
132 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
133 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
134 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
135 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
136 'CloseMatch', 'tokenMap', 'pyparsing_common',
137 ]
138
139 system_version = tuple(sys.version_info)[:3]
140 PY_3 = system_version[0] == 3
141 if PY_3:
142 _MAX_INT = sys.maxsize
143 basestring = str
144 unichr = chr
145 _ustr = str
146
147 # build list of single arg builtins, that can be used as parse actions
148 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
149
150 else:
151 _MAX_INT = sys.maxint
152 range = xrange
153
154 def _ustr(obj):
155 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
156 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
157 then < returns the unicode object | encodes it with the default encoding | ... >.
158 """
159 if isinstance(obj,unicode):
160 return obj
161
162 try:
163 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
164 # it won't break any existing code.
165 return str(obj)
166
167 except UnicodeEncodeError:
168 # Else encode it
169 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
170 xmlcharref = Regex(r'&#\d+;')
171 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
172 return xmlcharref.transformString(ret)
173
174 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
175 singleArgBuiltins = []
176 import __builtin__
177 for fname in "sum len sorted reversed list tuple set any all min max".split():
178 try:
179 singleArgBuiltins.append(getattr(__builtin__,fname))
180 except AttributeError:
181 continue
182
183 _generatorType = type((y for y in range(1)))
184
185 def _xml_escape(data):
186 """Escape &, <, >, ", ', etc. in a string of data."""
187
188 # ampersand must be replaced first
189 from_symbols = '&><"\''
190 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
191 for from_,to_ in zip(from_symbols, to_symbols):
192 data = data.replace(from_, to_)
193 return data
194
195 class _Constants(object):
196 pass
197
198 alphas = string.ascii_uppercase + string.ascii_lowercase
199 nums = "0123456789"
200 hexnums = nums + "ABCDEFabcdef"
201 alphanums = alphas + nums
202 _bslash = chr(92)
203 printables = "".join(c for c in string.printable if c not in string.whitespace)
204
205 class ParseBaseException(Exception):
206 """base exception class for all parsing runtime exceptions"""
207 # Performance tuning: we construct a *lot* of these, so keep this
208 # constructor as small and fast as possible
209 def __init__( self, pstr, loc=0, msg=None, elem=None ):
210 self.loc = loc
211 if msg is None:
212 self.msg = pstr
213 self.pstr = ""
214 else:
215 self.msg = msg
216 self.pstr = pstr
217 self.parserElement = elem
218 self.args = (pstr, loc, msg)
219
220 @classmethod
221 def _from_exception(cls, pe):
222 """
223 internal factory method to simplify creating one type of ParseException
224 from another - avoids having __init__ signature conflicts among subclasses
225 """
226 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
227
228 def __getattr__( self, aname ):
229 """supported attributes by name are:
230 - lineno - returns the line number of the exception text
231 - col - returns the column number of the exception text
232 - line - returns the line containing the exception text
233 """
234 if( aname == "lineno" ):
235 return lineno( self.loc, self.pstr )
236 elif( aname in ("col", "column") ):
237 return col( self.loc, self.pstr )
238 elif( aname == "line" ):
239 return line( self.loc, self.pstr )
240 else:
241 raise AttributeError(aname)
242
243 def __str__( self ):
244 return "%s (at char %d), (line:%d, col:%d)" % \
245 ( self.msg, self.loc, self.lineno, self.column )
246 def __repr__( self ):
247 return _ustr(self)
248 def markInputline( self, markerString = ">!<" ):
249 """Extracts the exception line from the input string, and marks
250 the location of the exception with a special symbol.
251 """
252 line_str = self.line
253 line_column = self.column - 1
254 if markerString:
255 line_str = "".join((line_str[:line_column],
256 markerString, line_str[line_column:]))
257 return line_str.strip()
258 def __dir__(self):
259 return "lineno col line".split() + dir(type(self))
260
261 class ParseException(ParseBaseException):
262 """
263 Exception thrown when parse expressions don't match class;
264 supported attributes by name are:
265 - lineno - returns the line number of the exception text
266 - col - returns the column number of the exception text
267 - line - returns the line containing the exception text
268
269 Example::
270 try:
271 Word(nums).setName("integer").parseString("ABC")
272 except ParseException as pe:
273 print(pe)
274 print("column: {}".format(pe.col))
275
276 prints::
277 Expected integer (at char 0), (line:1, col:1)
278 column: 1
279 """
280 pass
281
282 class ParseFatalException(ParseBaseException):
283 """user-throwable exception thrown when inconsistent parse content
284 is found; stops all parsing immediately"""
285 pass
286
287 class ParseSyntaxException(ParseFatalException):
288 """just like L{ParseFatalException}, but thrown internally when an
289 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
290 immediately because an unbacktrackable syntax error has been found"""
291 pass
292
293 #~ class ReparseException(ParseBaseException):
294 #~ """Experimental class - parse actions can raise this exception to cause
295 #~ pyparsing to reparse the input string:
296 #~ - with a modified input string, and/or
297 #~ - with a modified start location
298 #~ Set the values of the ReparseException in the constructor, and raise the
299 #~ exception in a parse action to cause pyparsing to use the new string/location.
300 #~ Setting the values as None causes no change to be made.
301 #~ """
302 #~ def __init_( self, newstring, restartLoc ):
303 #~ self.newParseText = newstring
304 #~ self.reparseLoc = restartLoc
305
306 class RecursiveGrammarException(Exception):
307 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
308 def __init__( self, parseElementList ):
309 self.parseElementTrace = parseElementList
310
311 def __str__( self ):
312 return "RecursiveGrammarException: %s" % self.parseElementTrace
313
314 class _ParseResultsWithOffset(object):
315 def __init__(self,p1,p2):
316 self.tup = (p1,p2)
317 def __getitem__(self,i):
318 return self.tup[i]
319 def __repr__(self):
320 return repr(self.tup[0])
321 def setOffset(self,i):
322 self.tup = (self.tup[0],i)
323
324 class ParseResults(object):
325 """
326 Structured parse results, to provide multiple means of access to the parsed data:
327 - as a list (C{len(results)})
328 - by list index (C{results[0], results[1]}, etc.)
329 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
330
331 Example::
332 integer = Word(nums)
333 date_str = (integer.setResultsName("year") + '/'
334 + integer.setResultsName("month") + '/'
335 + integer.setResultsName("day"))
336 # equivalent form:
337 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
338
339 # parseString returns a ParseResults object
340 result = date_str.parseString("1999/12/31")
341
342 def test(s, fn=repr):
343 print("%s -> %s" % (s, fn(eval(s))))
344 test("list(result)")
345 test("result[0]")
346 test("result['month']")
347 test("result.day")
348 test("'month' in result")
349 test("'minutes' in result")
350 test("result.dump()", str)
351 prints::
352 list(result) -> ['1999', '/', '12', '/', '31']
353 result[0] -> '1999'
354 result['month'] -> '12'
355 result.day -> '31'
356 'month' in result -> True
357 'minutes' in result -> False
358 result.dump() -> ['1999', '/', '12', '/', '31']
359 - day: 31
360 - month: 12
361 - year: 1999
362 """
363 def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
364 if isinstance(toklist, cls):
365 return toklist
366 retobj = object.__new__(cls)
367 retobj.__doinit = True
368 return retobj
369
370 # Performance tuning: we construct a *lot* of these, so keep this
371 # constructor as small and fast as possible
372 def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
373 if self.__doinit:
374 self.__doinit = False
375 self.__name = None
376 self.__parent = None
377 self.__accumNames = {}
378 self.__asList = asList
379 self.__modal = modal
380 if toklist is None:
381 toklist = []
382 if isinstance(toklist, list):
383 self.__toklist = toklist[:]
384 elif isinstance(toklist, _generatorType):
385 self.__toklist = list(toklist)
386 else:
387 self.__toklist = [toklist]
388 self.__tokdict = dict()
389
390 if name is not None and name:
391 if not modal:
392 self.__accumNames[name] = 0
393 if isinstance(name,int):
394 name = _ustr(name) # will always return a str, but use _ustr for consistency
395 self.__name = name
396 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
397 if isinstance(toklist,basestring):
398 toklist = [ toklist ]
399 if asList:
400 if isinstance(toklist,ParseResults):
401 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
402 else:
403 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
404 self[name].__name = name
405 else:
406 try:
407 self[name] = toklist[0]
408 except (KeyError,TypeError,IndexError):
409 self[name] = toklist
410
411 def __getitem__( self, i ):
412 if isinstance( i, (int,slice) ):
413 return self.__toklist[i]
414 else:
415 if i not in self.__accumNames:
416 return self.__tokdict[i][-1][0]
417 else:
418 return ParseResults([ v[0] for v in self.__tokdict[i] ])
419
420 def __setitem__( self, k, v, isinstance=isinstance ):
421 if isinstance(v,_ParseResultsWithOffset):
422 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
423 sub = v[0]
424 elif isinstance(k,(int,slice)):
425 self.__toklist[k] = v
426 sub = v
427 else:
428 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
429 sub = v
430 if isinstance(sub,ParseResults):
431 sub.__parent = wkref(self)
432
433 def __delitem__( self, i ):
434 if isinstance(i,(int,slice)):
435 mylen = len( self.__toklist )
436 del self.__toklist[i]
437
438 # convert int to slice
439 if isinstance(i, int):
440 if i < 0:
441 i += mylen
442 i = slice(i, i+1)
443 # get removed indices
444 removed = list(range(*i.indices(mylen)))
445 removed.reverse()
446 # fixup indices in token dictionary
447 for name,occurrences in self.__tokdict.items():
448 for j in removed:
449 for k, (value, position) in enumerate(occurrences):
450 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
451 else:
452 del self.__tokdict[i]
453
454 def __contains__( self, k ):
455 return k in self.__tokdict
456
457 def __len__( self ): return len( self.__toklist )
458 def __bool__(self): return ( not not self.__toklist )
459 __nonzero__ = __bool__
460 def __iter__( self ): return iter( self.__toklist )
461 def __reversed__( self ): return iter( self.__toklist[::-1] )
462 def _iterkeys( self ):
463 if hasattr(self.__tokdict, "iterkeys"):
464 return self.__tokdict.iterkeys()
465 else:
466 return iter(self.__tokdict)
467
468 def _itervalues( self ):
469 return (self[k] for k in self._iterkeys())
470
471 def _iteritems( self ):
472 return ((k, self[k]) for k in self._iterkeys())
473
474 if PY_3:
475 keys = _iterkeys
476 """Returns an iterator of all named result keys (Python 3.x only)."""
477
478 values = _itervalues
479 """Returns an iterator of all named result values (Python 3.x only)."""
480
481 items = _iteritems
482 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
483
484 else:
485 iterkeys = _iterkeys
486 """Returns an iterator of all named result keys (Python 2.x only)."""
487
488 itervalues = _itervalues
489 """Returns an iterator of all named result values (Python 2.x only)."""
490
491 iteritems = _iteritems
492 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
493
494 def keys( self ):
495 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
496 return list(self.iterkeys())
497
498 def values( self ):
499 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
500 return list(self.itervalues())
501
502 def items( self ):
503 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
504 return list(self.iteritems())
505
506 def haskeys( self ):
507 """Since keys() returns an iterator, this method is helpful in bypassing
508 code that looks for the existence of any defined results names."""
509 return bool(self.__tokdict)
510
511 def pop( self, *args, **kwargs):
512 """
513 Removes and returns item at specified index (default=C{last}).
514 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
515 argument or an integer argument, it will use C{list} semantics
516 and pop tokens from the list of parsed tokens. If passed a
517 non-integer argument (most likely a string), it will use C{dict}
518 semantics and pop the corresponding value from any defined
519 results names. A second default return value argument is
520 supported, just as in C{dict.pop()}.
521
522 Example::
523 def remove_first(tokens):
524 tokens.pop(0)
525 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
526 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
527
528 label = Word(alphas)
529 patt = label("LABEL") + OneOrMore(Word(nums))
530 print(patt.parseString("AAB 123 321").dump())
531
532 # Use pop() in a parse action to remove named result (note that corresponding value is not
533 # removed from list form of results)
534 def remove_LABEL(tokens):
535 tokens.pop("LABEL")
536 return tokens
537 patt.addParseAction(remove_LABEL)
538 print(patt.parseString("AAB 123 321").dump())
539 prints::
540 ['AAB', '123', '321']
541 - LABEL: AAB
542
543 ['AAB', '123', '321']
544 """
545 if not args:
546 args = [-1]
547 for k,v in kwargs.items():
548 if k == 'default':
549 args = (args[0], v)
550 else:
551 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
552 if (isinstance(args[0], int) or
553 len(args) == 1 or
554 args[0] in self):
555 index = args[0]
556 ret = self[index]
557 del self[index]
558 return ret
559 else:
560 defaultvalue = args[1]
561 return defaultvalue
562
563 def get(self, key, defaultValue=None):
564 """
565 Returns named result matching the given key, or if there is no
566 such name, then returns the given C{defaultValue} or C{None} if no
567 C{defaultValue} is specified.
568
569 Similar to C{dict.get()}.
570
571 Example::
572 integer = Word(nums)
573 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
574
575 result = date_str.parseString("1999/12/31")
576 print(result.get("year")) # -> '1999'
577 print(result.get("hour", "not specified")) # -> 'not specified'
578 print(result.get("hour")) # -> None
579 """
580 if key in self:
581 return self[key]
582 else:
583 return defaultValue
584
585 def insert( self, index, insStr ):
586 """
587 Inserts new element at location index in the list of parsed tokens.
588
589 Similar to C{list.insert()}.
590
591 Example::
592 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
593
594 # use a parse action to insert the parse location in the front of the parsed results
595 def insert_locn(locn, tokens):
596 tokens.insert(0, locn)
597 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
598 """
599 self.__toklist.insert(index, insStr)
600 # fixup indices in token dictionary
601 for name,occurrences in self.__tokdict.items():
602 for k, (value, position) in enumerate(occurrences):
603 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
604
605 def append( self, item ):
606 """
607 Add single element to end of ParseResults list of elements.
608
609 Example::
610 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
611
612 # use a parse action to compute the sum of the parsed integers, and add it to the end
613 def append_sum(tokens):
614 tokens.append(sum(map(int, tokens)))
615 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
616 """
617 self.__toklist.append(item)
618
619 def extend( self, itemseq ):
620 """
621 Add sequence of elements to end of ParseResults list of elements.
622
623 Example::
624 patt = OneOrMore(Word(alphas))
625
626 # use a parse action to append the reverse of the matched strings, to make a palindrome
627 def make_palindrome(tokens):
628 tokens.extend(reversed([t[::-1] for t in tokens]))
629 return ''.join(tokens)
630 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
631 """
632 if isinstance(itemseq, ParseResults):
633 self += itemseq
634 else:
635 self.__toklist.extend(itemseq)
636
637 def clear( self ):
638 """
639 Clear all elements and results names.
640 """
641 del self.__toklist[:]
642 self.__tokdict.clear()
643
644 def __getattr__( self, name ):
645 try:
646 return self[name]
647 except KeyError:
648 return ""
649
650 if name in self.__tokdict:
651 if name not in self.__accumNames:
652 return self.__tokdict[name][-1][0]
653 else:
654 return ParseResults([ v[0] for v in self.__tokdict[name] ])
655 else:
656 return ""
657
658 def __add__( self, other ):
659 ret = self.copy()
660 ret += other
661 return ret
662
663 def __iadd__( self, other ):
664 if other.__tokdict:
665 offset = len(self.__toklist)
666 addoffset = lambda a: offset if a<0 else a+offset
667 otheritems = other.__tokdict.items()
668 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
669 for (k,vlist) in otheritems for v in vlist]
670 for k,v in otherdictitems:
671 self[k] = v
672 if isinstance(v[0],ParseResults):
673 v[0].__parent = wkref(self)
674
675 self.__toklist += other.__toklist
676 self.__accumNames.update( other.__accumNames )
677 return self
678
679 def __radd__(self, other):
680 if isinstance(other,int) and other == 0:
681 # useful for merging many ParseResults using sum() builtin
682 return self.copy()
683 else:
684 # this may raise a TypeError - so be it
685 return other + self
686
687 def __repr__( self ):
688 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
689
690 def __str__( self ):
691 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
692
693 def _asStringList( self, sep='' ):
694 out = []
695 for item in self.__toklist:
696 if out and sep:
697 out.append(sep)
698 if isinstance( item, ParseResults ):
699 out += item._asStringList()
700 else:
701 out.append( _ustr(item) )
702 return out
703
704 def asList( self ):
705 """
706 Returns the parse results as a nested list of matching tokens, all converted to strings.
707
708 Example::
709 patt = OneOrMore(Word(alphas))
710 result = patt.parseString("sldkj lsdkj sldkj")
711 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
712 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
713
714 # Use asList() to create an actual list
715 result_list = result.asList()
716 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
717 """
718 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
719
720 def asDict( self ):
721 """
722 Returns the named parse results as a nested dictionary.
723
724 Example::
725 integer = Word(nums)
726 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
727
728 result = date_str.parseString('12/31/1999')
729 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
730
731 result_dict = result.asDict()
732 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
733
734 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
735 import json
736 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
737 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
738 """
739 if PY_3:
740 item_fn = self.items
741 else:
742 item_fn = self.iteritems
743
744 def toItem(obj):
745 if isinstance(obj, ParseResults):
746 if obj.haskeys():
747 return obj.asDict()
748 else:
749 return [toItem(v) for v in obj]
750 else:
751 return obj
752
753 return dict((k,toItem(v)) for k,v in item_fn())
754
755 def copy( self ):
756 """
757 Returns a new copy of a C{ParseResults} object.
758 """
759 ret = ParseResults( self.__toklist )
760 ret.__tokdict = self.__tokdict.copy()
761 ret.__parent = self.__parent
762 ret.__accumNames.update( self.__accumNames )
763 ret.__name = self.__name
764 return ret
765
766 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
767 """
768 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
769 """
770 nl = "\n"
771 out = []
772 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
773 for v in vlist)
774 nextLevelIndent = indent + " "
775
776 # collapse out indents if formatting is not desired
777 if not formatted:
778 indent = ""
779 nextLevelIndent = ""
780 nl = ""
781
782 selfTag = None
783 if doctag is not None:
784 selfTag = doctag
785 else:
786 if self.__name:
787 selfTag = self.__name
788
789 if not selfTag:
790 if namedItemsOnly:
791 return ""
792 else:
793 selfTag = "ITEM"
794
795 out += [ nl, indent, "<", selfTag, ">" ]
796
797 for i,res in enumerate(self.__toklist):
798 if isinstance(res,ParseResults):
799 if i in namedItems:
800 out += [ res.asXML(namedItems[i],
801 namedItemsOnly and doctag is None,
802 nextLevelIndent,
803 formatted)]
804 else:
805 out += [ res.asXML(None,
806 namedItemsOnly and doctag is None,
807 nextLevelIndent,
808 formatted)]
809 else:
810 # individual token, see if there is a name for it
811 resTag = None
812 if i in namedItems:
813 resTag = namedItems[i]
814 if not resTag:
815 if namedItemsOnly:
816 continue
817 else:
818 resTag = "ITEM"
819 xmlBodyText = _xml_escape(_ustr(res))
820 out += [ nl, nextLevelIndent, "<", resTag, ">",
821 xmlBodyText,
822 "</", resTag, ">" ]
823
824 out += [ nl, indent, "</", selfTag, ">" ]
825 return "".join(out)
826
827 def __lookup(self,sub):
828 for k,vlist in self.__tokdict.items():
829 for v,loc in vlist:
830 if sub is v:
831 return k
832 return None
833
834 def getName(self):
835 r"""
836 Returns the results name for this token expression. Useful when several
837 different expressions might match at a particular location.
838
839 Example::
840 integer = Word(nums)
841 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
842 house_number_expr = Suppress('#') + Word(nums, alphanums)
843 user_data = (Group(house_number_expr)("house_number")
844 | Group(ssn_expr)("ssn")
845 | Group(integer)("age"))
846 user_info = OneOrMore(user_data)
847
848 result = user_info.parseString("22 111-22-3333 #221B")
849 for item in result:
850 print(item.getName(), ':', item[0])
851 prints::
852 age : 22
853 ssn : 111-22-3333
854 house_number : 221B
855 """
856 if self.__name:
857 return self.__name
858 elif self.__parent:
859 par = self.__parent()
860 if par:
861 return par.__lookup(self)
862 else:
863 return None
864 elif (len(self) == 1 and
865 len(self.__tokdict) == 1 and
866 next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
867 return next(iter(self.__tokdict.keys()))
868 else:
869 return None
870
871 def dump(self, indent='', depth=0, full=True):
872 """
873 Diagnostic method for listing out the contents of a C{ParseResults}.
874 Accepts an optional C{indent} argument so that this string can be embedded
875 in a nested display of other data.
876
877 Example::
878 integer = Word(nums)
879 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
880
881 result = date_str.parseString('12/31/1999')
882 print(result.dump())
883 prints::
884 ['12', '/', '31', '/', '1999']
885 - day: 1999
886 - month: 31
887 - year: 12
888 """
889 out = []
890 NL = '\n'
891 out.append( indent+_ustr(self.asList()) )
892 if full:
893 if self.haskeys():
894 items = sorted((str(k), v) for k,v in self.items())
895 for k,v in items:
896 if out:
897 out.append(NL)
898 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
899 if isinstance(v,ParseResults):
900 if v:
901 out.append( v.dump(indent,depth+1) )
902 else:
903 out.append(_ustr(v))
904 else:
905 out.append(repr(v))
906 elif any(isinstance(vv,ParseResults) for vv in self):
907 v = self
908 for i,vv in enumerate(v):
909 if isinstance(vv,ParseResults):
910 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
911 else:
912 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
913
914 return "".join(out)
915
916 def pprint(self, *args, **kwargs):
917 """
918 Pretty-printer for parsed results as a list, using the C{pprint} module.
919 Accepts additional positional or keyword args as defined for the
920 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
921
922 Example::
923 ident = Word(alphas, alphanums)
924 num = Word(nums)
925 func = Forward()
926 term = ident | num | Group('(' + func + ')')
927 func <<= ident + Group(Optional(delimitedList(term)))
928 result = func.parseString("fna a,b,(fnb c,d,200),100")
929 result.pprint(width=40)
930 prints::
931 ['fna',
932 ['a',
933 'b',
934 ['(', 'fnb', ['c', 'd', '200'], ')'],
935 '100']]
936 """
937 pprint.pprint(self.asList(), *args, **kwargs)
938
939 # add support for pickle protocol
940 def __getstate__(self):
941 return ( self.__toklist,
942 ( self.__tokdict.copy(),
943 self.__parent is not None and self.__parent() or None,
944 self.__accumNames,
945 self.__name ) )
946
947 def __setstate__(self,state):
948 self.__toklist = state[0]
949 (self.__tokdict,
950 par,
951 inAccumNames,
952 self.__name) = state[1]
953 self.__accumNames = {}
954 self.__accumNames.update(inAccumNames)
955 if par is not None:
956 self.__parent = wkref(par)
957 else:
958 self.__parent = None
959
960 def __getnewargs__(self):
961 return self.__toklist, self.__name, self.__asList, self.__modal
962
963 def __dir__(self):
964 return (dir(type(self)) + list(self.keys()))
965
966 MutableMapping.register(ParseResults)
967
968 def col (loc,strg):
969 """Returns current column within a string, counting newlines as line separators.
970 The first column is number 1.
971
972 Note: the default parsing behavior is to expand tabs in the input string
973 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
974 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
975 consistent view of the parsed string, the parse location, and line and column
976 positions within the parsed string.
977 """
978 s = strg
979 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
980
981 def lineno(loc,strg):
982 """Returns current line number within a string, counting newlines as line separators.
983 The first line is number 1.
984
985 Note: the default parsing behavior is to expand tabs in the input string
986 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
987 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
988 consistent view of the parsed string, the parse location, and line and column
989 positions within the parsed string.
990 """
991 return strg.count("\n",0,loc) + 1
992
993 def line( loc, strg ):
994 """Returns the line of text containing loc within a string, counting newlines as line separators.
995 """
996 lastCR = strg.rfind("\n", 0, loc)
997 nextCR = strg.find("\n", loc)
998 if nextCR >= 0:
999 return strg[lastCR+1:nextCR]
1000 else:
1001 return strg[lastCR+1:]
1002
1003 def _defaultStartDebugAction( instring, loc, expr ):
1004 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1005
1006 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
1007 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1008
1009 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
1010 print ("Exception raised:" + _ustr(exc))
1011
1012 def nullDebugAction(*args):
1013 """'Do-nothing' debug action, to suppress debugging output during parsing."""
1014 pass
1015
1016 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1017 #~ 'decorator to trim function calls to match the arity of the target'
1018 #~ def _trim_arity(func, maxargs=3):
1019 #~ if func in singleArgBuiltins:
1020 #~ return lambda s,l,t: func(t)
1021 #~ limit = 0
1022 #~ foundArity = False
1023 #~ def wrapper(*args):
1024 #~ nonlocal limit,foundArity
1025 #~ while 1:
1026 #~ try:
1027 #~ ret = func(*args[limit:])
1028 #~ foundArity = True
1029 #~ return ret
1030 #~ except TypeError:
1031 #~ if limit == maxargs or foundArity:
1032 #~ raise
1033 #~ limit += 1
1034 #~ continue
1035 #~ return wrapper
1036
1037 # this version is Python 2.x-3.x cross-compatible
1038 'decorator to trim function calls to match the arity of the target'
1039 def _trim_arity(func, maxargs=2):
1040 if func in singleArgBuiltins:
1041 return lambda s,l,t: func(t)
1042 limit = [0]
1043 foundArity = [False]
1044
1045 # traceback return data structure changed in Py3.5 - normalize back to plain tuples
1046 if system_version[:2] >= (3,5):
1047 def extract_stack(limit=0):
1048 # special handling for Python 3.5.0 - extra deep call stack by 1
1049 offset = -3 if system_version == (3,5,0) else -2
1050 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1051 return [frame_summary[:2]]
1052 def extract_tb(tb, limit=0):
1053 frames = traceback.extract_tb(tb, limit=limit)
1054 frame_summary = frames[-1]
1055 return [frame_summary[:2]]
1056 else:
1057 extract_stack = traceback.extract_stack
1058 extract_tb = traceback.extract_tb
1059
1060 # synthesize what would be returned by traceback.extract_stack at the call to
1061 # user's parse action 'func', so that we don't incur call penalty at parse time
1062
1063 LINE_DIFF = 6
1064 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1065 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1066 this_line = extract_stack(limit=2)[-1]
1067 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1068
1069 def wrapper(*args):
1070 while 1:
1071 try:
1072 ret = func(*args[limit[0]:])
1073 foundArity[0] = True
1074 return ret
1075 except TypeError:
1076 # re-raise TypeErrors if they did not come from our arity testing
1077 if foundArity[0]:
1078 raise
1079 else:
1080 try:
1081 tb = sys.exc_info()[-1]
1082 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1083 raise
1084 finally:
1085 del tb
1086
1087 if limit[0] <= maxargs:
1088 limit[0] += 1
1089 continue
1090 raise
1091
1092 # copy func name to wrapper for sensible debug output
1093 func_name = "<parse action>"
1094 try:
1095 func_name = getattr(func, '__name__',
1096 getattr(func, '__class__').__name__)
1097 except Exception:
1098 func_name = str(func)
1099 wrapper.__name__ = func_name
1100
1101 return wrapper
1102
1103 class ParserElement(object):
1104 """Abstract base level parser element class."""
1105 DEFAULT_WHITE_CHARS = " \n\t\r"
1106 verbose_stacktrace = False
1107
1108 @staticmethod
1109 def setDefaultWhitespaceChars( chars ):
1110 r"""
1111 Overrides the default whitespace chars
1112
1113 Example::
1114 # default whitespace chars are space, <TAB> and newline
1115 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1116
1117 # change to just treat newline as significant
1118 ParserElement.setDefaultWhitespaceChars(" \t")
1119 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1120 """
1121 ParserElement.DEFAULT_WHITE_CHARS = chars
1122
1123 @staticmethod
1124 def inlineLiteralsUsing(cls):
1125 """
1126 Set class to be used for inclusion of string literals into a parser.
1127
1128 Example::
1129 # default literal class used is Literal
1130 integer = Word(nums)
1131 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1132
1133 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1134
1135
1136 # change to Suppress
1137 ParserElement.inlineLiteralsUsing(Suppress)
1138 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1139
1140 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1141 """
1142 ParserElement._literalStringClass = cls
1143
1144 def __init__( self, savelist=False ):
1145 self.parseAction = list()
1146 self.failAction = None
1147 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
1148 self.strRepr = None
1149 self.resultsName = None
1150 self.saveAsList = savelist
1151 self.skipWhitespace = True
1152 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1153 self.copyDefaultWhiteChars = True
1154 self.mayReturnEmpty = False # used when checking for left-recursion
1155 self.keepTabs = False
1156 self.ignoreExprs = list()
1157 self.debug = False
1158 self.streamlined = False
1159 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1160 self.errmsg = ""
1161 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1162 self.debugActions = ( None, None, None ) #custom debug actions
1163 self.re = None
1164 self.callPreparse = True # used to avoid redundant calls to preParse
1165 self.callDuringTry = False
1166
1167 def copy( self ):
1168 """
1169 Make a copy of this C{ParserElement}. Useful for defining different parse actions
1170 for the same parsing pattern, using copies of the original parse element.
1171
1172 Example::
1173 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1174 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1175 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1176
1177 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1178 prints::
1179 [5120, 100, 655360, 268435456]
1180 Equivalent form of C{expr.copy()} is just C{expr()}::
1181 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1182 """
1183 cpy = copy.copy( self )
1184 cpy.parseAction = self.parseAction[:]
1185 cpy.ignoreExprs = self.ignoreExprs[:]
1186 if self.copyDefaultWhiteChars:
1187 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1188 return cpy
1189
1190 def setName( self, name ):
1191 """
1192 Define name for this expression, makes debugging and exception messages clearer.
1193
1194 Example::
1195 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1196 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1197 """
1198 self.name = name
1199 self.errmsg = "Expected " + self.name
1200 if hasattr(self,"exception"):
1201 self.exception.msg = self.errmsg
1202 return self
1203
1204 def setResultsName( self, name, listAllMatches=False ):
1205 """
1206 Define name for referencing matching tokens as a nested attribute
1207 of the returned parse results.
1208 NOTE: this returns a *copy* of the original C{ParserElement} object;
1209 this is so that the client can define a basic element, such as an
1210 integer, and reference it in multiple places with different names.
1211
1212 You can also set results names using the abbreviated syntax,
1213 C{expr("name")} in place of C{expr.setResultsName("name")} -
1214 see L{I{__call__}<__call__>}.
1215
1216 Example::
1217 date_str = (integer.setResultsName("year") + '/'
1218 + integer.setResultsName("month") + '/'
1219 + integer.setResultsName("day"))
1220
1221 # equivalent form:
1222 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1223 """
1224 newself = self.copy()
1225 if name.endswith("*"):
1226 name = name[:-1]
1227 listAllMatches=True
1228 newself.resultsName = name
1229 newself.modalResults = not listAllMatches
1230 return newself
1231
1232 def setBreak(self,breakFlag = True):
1233 """Method to invoke the Python pdb debugger when this element is
1234 about to be parsed. Set C{breakFlag} to True to enable, False to
1235 disable.
1236 """
1237 if breakFlag:
1238 _parseMethod = self._parse
1239 def breaker(instring, loc, doActions=True, callPreParse=True):
1240 import pdb
1241 pdb.set_trace()
1242 return _parseMethod( instring, loc, doActions, callPreParse )
1243 breaker._originalParseMethod = _parseMethod
1244 self._parse = breaker
1245 else:
1246 if hasattr(self._parse,"_originalParseMethod"):
1247 self._parse = self._parse._originalParseMethod
1248 return self
1249
1250 def setParseAction( self, *fns, **kwargs ):
1251 """
1252 Define one or more actions to perform when successfully matching parse element definition.
1253 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1254 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1255 - s = the original string being parsed (see note below)
1256 - loc = the location of the matching substring
1257 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1258 If the functions in fns modify the tokens, they can return them as the return
1259 value from fn, and the modified list of tokens will replace the original.
1260 Otherwise, fn does not need to return any value.
1261
1262 Optional keyword arguments:
1263 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1264
1265 Note: the default parsing behavior is to expand tabs in the input string
1266 before starting the parsing process. See L{I{parseString}<parseString>} for more information
1267 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1268 consistent view of the parsed string, the parse location, and line and column
1269 positions within the parsed string.
1270
1271 Example::
1272 integer = Word(nums)
1273 date_str = integer + '/' + integer + '/' + integer
1274
1275 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1276
1277 # use parse action to convert to ints at parse time
1278 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1279 date_str = integer + '/' + integer + '/' + integer
1280
1281 # note that integer fields are now ints, not strings
1282 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1283 """
1284 self.parseAction = list(map(_trim_arity, list(fns)))
1285 self.callDuringTry = kwargs.get("callDuringTry", False)
1286 return self
1287
1288 def addParseAction( self, *fns, **kwargs ):
1289 """
1290 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1291
1292 See examples in L{I{copy}<copy>}.
1293 """
1294 self.parseAction += list(map(_trim_arity, list(fns)))
1295 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1296 return self
1297
1298 def addCondition(self, *fns, **kwargs):
1299 """Add a boolean predicate function to expression's list of parse actions. See
1300 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1301 functions passed to C{addCondition} need to return boolean success/fail of the condition.
1302
1303 Optional keyword arguments:
1304 - message = define a custom message to be used in the raised exception
1305 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1306
1307 Example::
1308 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1309 year_int = integer.copy()
1310 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1311 date_str = year_int + '/' + integer + '/' + integer
1312
1313 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1314 """
1315 msg = kwargs.get("message", "failed user-defined condition")
1316 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1317 for fn in fns:
1318 def pa(s,l,t):
1319 if not bool(_trim_arity(fn)(s,l,t)):
1320 raise exc_type(s,l,msg)
1321 self.parseAction.append(pa)
1322 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1323 return self
1324
1325 def setFailAction( self, fn ):
1326 """Define action to perform if parsing fails at this expression.
1327 Fail acton fn is a callable function that takes the arguments
1328 C{fn(s,loc,expr,err)} where:
1329 - s = string being parsed
1330 - loc = location where expression match was attempted and failed
1331 - expr = the parse expression that failed
1332 - err = the exception thrown
1333 The function returns no value. It may throw C{L{ParseFatalException}}
1334 if it is desired to stop parsing immediately."""
1335 self.failAction = fn
1336 return self
1337
1338 def _skipIgnorables( self, instring, loc ):
1339 exprsFound = True
1340 while exprsFound:
1341 exprsFound = False
1342 for e in self.ignoreExprs:
1343 try:
1344 while 1:
1345 loc,dummy = e._parse( instring, loc )
1346 exprsFound = True
1347 except ParseException:
1348 pass
1349 return loc
1350
1351 def preParse( self, instring, loc ):
1352 if self.ignoreExprs:
1353 loc = self._skipIgnorables( instring, loc )
1354
1355 if self.skipWhitespace:
1356 wt = self.whiteChars
1357 instrlen = len(instring)
1358 while loc < instrlen and instring[loc] in wt:
1359 loc += 1
1360
1361 return loc
1362
1363 def parseImpl( self, instring, loc, doActions=True ):
1364 return loc, []
1365
1366 def postParse( self, instring, loc, tokenlist ):
1367 return tokenlist
1368
1369 #~ @profile
1370 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1371 debugging = ( self.debug ) #and doActions )
1372
1373 if debugging or self.failAction:
1374 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
1375 if (self.debugActions[0] ):
1376 self.debugActions[0]( instring, loc, self )
1377 if callPreParse and self.callPreparse:
1378 preloc = self.preParse( instring, loc )
1379 else:
1380 preloc = loc
1381 tokensStart = preloc
1382 try:
1383 try:
1384 loc,tokens = self.parseImpl( instring, preloc, doActions )
1385 except IndexError:
1386 raise ParseException( instring, len(instring), self.errmsg, self )
1387 except ParseBaseException as err:
1388 #~ print ("Exception raised:", err)
1389 if self.debugActions[2]:
1390 self.debugActions[2]( instring, tokensStart, self, err )
1391 if self.failAction:
1392 self.failAction( instring, tokensStart, self, err )
1393 raise
1394 else:
1395 if callPreParse and self.callPreparse:
1396 preloc = self.preParse( instring, loc )
1397 else:
1398 preloc = loc
1399 tokensStart = preloc
1400 if self.mayIndexError or preloc >= len(instring):
1401 try:
1402 loc,tokens = self.parseImpl( instring, preloc, doActions )
1403 except IndexError:
1404 raise ParseException( instring, len(instring), self.errmsg, self )
1405 else:
1406 loc,tokens = self.parseImpl( instring, preloc, doActions )
1407
1408 tokens = self.postParse( instring, loc, tokens )
1409
1410 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1411 if self.parseAction and (doActions or self.callDuringTry):
1412 if debugging:
1413 try:
1414 for fn in self.parseAction:
1415 tokens = fn( instring, tokensStart, retTokens )
1416 if tokens is not None:
1417 retTokens = ParseResults( tokens,
1418 self.resultsName,
1419 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1420 modal=self.modalResults )
1421 except ParseBaseException as err:
1422 #~ print "Exception raised in user parse action:", err
1423 if (self.debugActions[2] ):
1424 self.debugActions[2]( instring, tokensStart, self, err )
1425 raise
1426 else:
1427 for fn in self.parseAction:
1428 tokens = fn( instring, tokensStart, retTokens )
1429 if tokens is not None:
1430 retTokens = ParseResults( tokens,
1431 self.resultsName,
1432 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1433 modal=self.modalResults )
1434 if debugging:
1435 #~ print ("Matched",self,"->",retTokens.asList())
1436 if (self.debugActions[1] ):
1437 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1438
1439 return loc, retTokens
1440
1441 def tryParse( self, instring, loc ):
1442 try:
1443 return self._parse( instring, loc, doActions=False )[0]
1444 except ParseFatalException:
1445 raise ParseException( instring, loc, self.errmsg, self)
1446
1447 def canParseNext(self, instring, loc):
1448 try:
1449 self.tryParse(instring, loc)
1450 except (ParseException, IndexError):
1451 return False
1452 else:
1453 return True
1454
1455 class _UnboundedCache(object):
1456 def __init__(self):
1457 cache = {}
1458 self.not_in_cache = not_in_cache = object()
1459
1460 def get(self, key):
1461 return cache.get(key, not_in_cache)
1462
1463 def set(self, key, value):
1464 cache[key] = value
1465
1466 def clear(self):
1467 cache.clear()
1468
1469 def cache_len(self):
1470 return len(cache)
1471
1472 self.get = types.MethodType(get, self)
1473 self.set = types.MethodType(set, self)
1474 self.clear = types.MethodType(clear, self)
1475 self.__len__ = types.MethodType(cache_len, self)
1476
1477 if _OrderedDict is not None:
1478 class _FifoCache(object):
1479 def __init__(self, size):
1480 self.not_in_cache = not_in_cache = object()
1481
1482 cache = _OrderedDict()
1483
1484 def get(self, key):
1485 return cache.get(key, not_in_cache)
1486
1487 def set(self, key, value):
1488 cache[key] = value
1489 while len(cache) > size:
1490 try:
1491 cache.popitem(False)
1492 except KeyError:
1493 pass
1494
1495 def clear(self):
1496 cache.clear()
1497
1498 def cache_len(self):
1499 return len(cache)
1500
1501 self.get = types.MethodType(get, self)
1502 self.set = types.MethodType(set, self)
1503 self.clear = types.MethodType(clear, self)
1504 self.__len__ = types.MethodType(cache_len, self)
1505
1506 else:
1507 class _FifoCache(object):
1508 def __init__(self, size):
1509 self.not_in_cache = not_in_cache = object()
1510
1511 cache = {}
1512 key_fifo = collections.deque([], size)
1513
1514 def get(self, key):
1515 return cache.get(key, not_in_cache)
1516
1517 def set(self, key, value):
1518 cache[key] = value
1519 while len(key_fifo) > size:
1520 cache.pop(key_fifo.popleft(), None)
1521 key_fifo.append(key)
1522
1523 def clear(self):
1524 cache.clear()
1525 key_fifo.clear()
1526
1527 def cache_len(self):
1528 return len(cache)
1529
1530 self.get = types.MethodType(get, self)
1531 self.set = types.MethodType(set, self)
1532 self.clear = types.MethodType(clear, self)
1533 self.__len__ = types.MethodType(cache_len, self)
1534
1535 # argument cache for optimizing repeated calls when backtracking through recursive expressions
1536 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1537 packrat_cache_lock = RLock()
1538 packrat_cache_stats = [0, 0]
1539
1540 # this method gets repeatedly called during backtracking with the same arguments -
1541 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1542 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1543 HIT, MISS = 0, 1
1544 lookup = (self, instring, loc, callPreParse, doActions)
1545 with ParserElement.packrat_cache_lock:
1546 cache = ParserElement.packrat_cache
1547 value = cache.get(lookup)
1548 if value is cache.not_in_cache:
1549 ParserElement.packrat_cache_stats[MISS] += 1
1550 try:
1551 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1552 except ParseBaseException as pe:
1553 # cache a copy of the exception, without the traceback
1554 cache.set(lookup, pe.__class__(*pe.args))
1555 raise
1556 else:
1557 cache.set(lookup, (value[0], value[1].copy()))
1558 return value
1559 else:
1560 ParserElement.packrat_cache_stats[HIT] += 1
1561 if isinstance(value, Exception):
1562 raise value
1563 return (value[0], value[1].copy())
1564
1565 _parse = _parseNoCache
1566
1567 @staticmethod
1568 def resetCache():
1569 ParserElement.packrat_cache.clear()
1570 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1571
1572 _packratEnabled = False
1573 @staticmethod
1574 def enablePackrat(cache_size_limit=128):
1575 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1576 Repeated parse attempts at the same string location (which happens
1577 often in many complex grammars) can immediately return a cached value,
1578 instead of re-executing parsing/validating code. Memoizing is done of
1579 both valid results and parsing exceptions.
1580
1581 Parameters:
1582 - cache_size_limit - (default=C{128}) - if an integer value is provided
1583 will limit the size of the packrat cache; if None is passed, then
1584 the cache size will be unbounded; if 0 is passed, the cache will
1585 be effectively disabled.
1586
1587 This speedup may break existing programs that use parse actions that
1588 have side-effects. For this reason, packrat parsing is disabled when
1589 you first import pyparsing. To activate the packrat feature, your
1590 program must call the class method C{ParserElement.enablePackrat()}. If
1591 your program uses C{psyco} to "compile as you go", you must call
1592 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1593 Python will crash. For best results, call C{enablePackrat()} immediately
1594 after importing pyparsing.
1595
1596 Example::
1597 import pyparsing
1598 pyparsing.ParserElement.enablePackrat()
1599 """
1600 if not ParserElement._packratEnabled:
1601 ParserElement._packratEnabled = True
1602 if cache_size_limit is None:
1603 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1604 else:
1605 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1606 ParserElement._parse = ParserElement._parseCache
1607
1608 def parseString( self, instring, parseAll=False ):
1609 """
1610 Execute the parse expression with the given string.
1611 This is the main interface to the client code, once the complete
1612 expression has been built.
1613
1614 If you want the grammar to require that the entire input string be
1615 successfully parsed, then set C{parseAll} to True (equivalent to ending
1616 the grammar with C{L{StringEnd()}}).
1617
1618 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1619 in order to report proper column numbers in parse actions.
1620 If the input string contains tabs and
1621 the grammar uses parse actions that use the C{loc} argument to index into the
1622 string being parsed, you can ensure you have a consistent view of the input
1623 string by:
1624 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1625 (see L{I{parseWithTabs}<parseWithTabs>})
1626 - define your parse action using the full C{(s,loc,toks)} signature, and
1627 reference the input string using the parse action's C{s} argument
1628 - explictly expand the tabs in your input string before calling
1629 C{parseString}
1630
1631 Example::
1632 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1633 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1634 """
1635 ParserElement.resetCache()
1636 if not self.streamlined:
1637 self.streamline()
1638 #~ self.saveAsList = True
1639 for e in self.ignoreExprs:
1640 e.streamline()
1641 if not self.keepTabs:
1642 instring = instring.expandtabs()
1643 try:
1644 loc, tokens = self._parse( instring, 0 )
1645 if parseAll:
1646 loc = self.preParse( instring, loc )
1647 se = Empty() + StringEnd()
1648 se._parse( instring, loc )
1649 except ParseBaseException as exc:
1650 if ParserElement.verbose_stacktrace:
1651 raise
1652 else:
1653 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1654 raise exc
1655 else:
1656 return tokens
1657
1658 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1659 """
1660 Scan the input string for expression matches. Each match will return the
1661 matching tokens, start location, and end location. May be called with optional
1662 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1663 C{overlap} is specified, then overlapping matches will be reported.
1664
1665 Note that the start and end locations are reported relative to the string
1666 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1667 strings with embedded tabs.
1668
1669 Example::
1670 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1671 print(source)
1672 for tokens,start,end in Word(alphas).scanString(source):
1673 print(' '*start + '^'*(end-start))
1674 print(' '*start + tokens[0])
1675
1676 prints::
1677
1678 sldjf123lsdjjkf345sldkjf879lkjsfd987
1679 ^^^^^
1680 sldjf
1681 ^^^^^^^
1682 lsdjjkf
1683 ^^^^^^
1684 sldkjf
1685 ^^^^^^
1686 lkjsfd
1687 """
1688 if not self.streamlined:
1689 self.streamline()
1690 for e in self.ignoreExprs:
1691 e.streamline()
1692
1693 if not self.keepTabs:
1694 instring = _ustr(instring).expandtabs()
1695 instrlen = len(instring)
1696 loc = 0
1697 preparseFn = self.preParse
1698 parseFn = self._parse
1699 ParserElement.resetCache()
1700 matches = 0
1701 try:
1702 while loc <= instrlen and matches < maxMatches:
1703 try:
1704 preloc = preparseFn( instring, loc )
1705 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1706 except ParseException:
1707 loc = preloc+1
1708 else:
1709 if nextLoc > loc:
1710 matches += 1
1711 yield tokens, preloc, nextLoc
1712 if overlap:
1713 nextloc = preparseFn( instring, loc )
1714 if nextloc > loc:
1715 loc = nextLoc
1716 else:
1717 loc += 1
1718 else:
1719 loc = nextLoc
1720 else:
1721 loc = preloc+1
1722 except ParseBaseException as exc:
1723 if ParserElement.verbose_stacktrace:
1724 raise
1725 else:
1726 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1727 raise exc
1728
1729 def transformString( self, instring ):
1730 """
1731 Extension to C{L{scanString}}, to modify matching text with modified tokens that may
1732 be returned from a parse action. To use C{transformString}, define a grammar and
1733 attach a parse action to it that modifies the returned token list.
1734 Invoking C{transformString()} on a target string will then scan for matches,
1735 and replace the matched text patterns according to the logic in the parse
1736 action. C{transformString()} returns the resulting transformed string.
1737
1738 Example::
1739 wd = Word(alphas)
1740 wd.setParseAction(lambda toks: toks[0].title())
1741
1742 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
1743 Prints::
1744 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1745 """
1746 out = []
1747 lastE = 0
1748 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1749 # keep string locs straight between transformString and scanString
1750 self.keepTabs = True
1751 try:
1752 for t,s,e in self.scanString( instring ):
1753 out.append( instring[lastE:s] )
1754 if t:
1755 if isinstance(t,ParseResults):
1756 out += t.asList()
1757 elif isinstance(t,list):
1758 out += t
1759 else:
1760 out.append(t)
1761 lastE = e
1762 out.append(instring[lastE:])
1763 out = [o for o in out if o]
1764 return "".join(map(_ustr,_flatten(out)))
1765 except ParseBaseException as exc:
1766 if ParserElement.verbose_stacktrace:
1767 raise
1768 else:
1769 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1770 raise exc
1771
1772 def searchString( self, instring, maxMatches=_MAX_INT ):
1773 """
1774 Another extension to C{L{scanString}}, simplifying the access to the tokens found
1775 to match the given parse expression. May be called with optional
1776 C{maxMatches} argument, to clip searching after 'n' matches are found.
1777
1778 Example::
1779 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1780 cap_word = Word(alphas.upper(), alphas.lower())
1781
1782 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1783
1784 # the sum() builtin can be used to merge results into a single ParseResults object
1785 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
1786 prints::
1787 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1788 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1789 """
1790 try:
1791 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1792 except ParseBaseException as exc:
1793 if ParserElement.verbose_stacktrace:
1794 raise
1795 else:
1796 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1797 raise exc
1798
1799 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1800 """
1801 Generator method to split a string using the given expression as a separator.
1802 May be called with optional C{maxsplit} argument, to limit the number of splits;
1803 and the optional C{includeSeparators} argument (default=C{False}), if the separating
1804 matching text should be included in the split results.
1805
1806 Example::
1807 punc = oneOf(list(".,;:/-!?"))
1808 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1809 prints::
1810 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1811 """
1812 splits = 0
1813 last = 0
1814 for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1815 yield instring[last:s]
1816 if includeSeparators:
1817 yield t[0]
1818 last = e
1819 yield instring[last:]
1820
1821 def __add__(self, other ):
1822 """
1823 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
1824 converts them to L{Literal}s by default.
1825
1826 Example::
1827 greet = Word(alphas) + "," + Word(alphas) + "!"
1828 hello = "Hello, World!"
1829 print (hello, "->", greet.parseString(hello))
1830 Prints::
1831 Hello, World! -> ['Hello', ',', 'World', '!']
1832 """
1833 if isinstance( other, basestring ):
1834 other = ParserElement._literalStringClass( other )
1835 if not isinstance( other, ParserElement ):
1836 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1837 SyntaxWarning, stacklevel=2)
1838 return None
1839 return And( [ self, other ] )
1840
1841 def __radd__(self, other ):
1842 """
1843 Implementation of + operator when left operand is not a C{L{ParserElement}}
1844 """
1845 if isinstance( other, basestring ):
1846 other = ParserElement._literalStringClass( other )
1847 if not isinstance( other, ParserElement ):
1848 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1849 SyntaxWarning, stacklevel=2)
1850 return None
1851 return other + self
1852
1853 def __sub__(self, other):
1854 """
1855 Implementation of - operator, returns C{L{And}} with error stop
1856 """
1857 if isinstance( other, basestring ):
1858 other = ParserElement._literalStringClass( other )
1859 if not isinstance( other, ParserElement ):
1860 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1861 SyntaxWarning, stacklevel=2)
1862 return None
1863 return self + And._ErrorStop() + other
1864
1865 def __rsub__(self, other ):
1866 """
1867 Implementation of - operator when left operand is not a C{L{ParserElement}}
1868 """
1869 if isinstance( other, basestring ):
1870 other = ParserElement._literalStringClass( other )
1871 if not isinstance( other, ParserElement ):
1872 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1873 SyntaxWarning, stacklevel=2)
1874 return None
1875 return other - self
1876
1877 def __mul__(self,other):
1878 """
1879 Implementation of * operator, allows use of C{expr * 3} in place of
1880 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1881 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1882 may also include C{None} as in:
1883 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1884 to C{expr*n + L{ZeroOrMore}(expr)}
1885 (read as "at least n instances of C{expr}")
1886 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1887 (read as "0 to n instances of C{expr}")
1888 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1889 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1890
1891 Note that C{expr*(None,n)} does not raise an exception if
1892 more than n exprs exist in the input stream; that is,
1893 C{expr*(None,n)} does not enforce a maximum number of expr
1894 occurrences. If this behavior is desired, then write
1895 C{expr*(None,n) + ~expr}
1896 """
1897 if isinstance(other,int):
1898 minElements, optElements = other,0
1899 elif isinstance(other,tuple):
1900 other = (other + (None, None))[:2]
1901 if other[0] is None:
1902 other = (0, other[1])
1903 if isinstance(other[0],int) and other[1] is None:
1904 if other[0] == 0:
1905 return ZeroOrMore(self)
1906 if other[0] == 1:
1907 return OneOrMore(self)
1908 else:
1909 return self*other[0] + ZeroOrMore(self)
1910 elif isinstance(other[0],int) and isinstance(other[1],int):
1911 minElements, optElements = other
1912 optElements -= minElements
1913 else:
1914 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1915 else:
1916 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1917
1918 if minElements < 0:
1919 raise ValueError("cannot multiply ParserElement by negative value")
1920 if optElements < 0:
1921 raise ValueError("second tuple value must be greater or equal to first tuple value")
1922 if minElements == optElements == 0:
1923 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1924
1925 if (optElements):
1926 def makeOptionalList(n):
1927 if n>1:
1928 return Optional(self + makeOptionalList(n-1))
1929 else:
1930 return Optional(self)
1931 if minElements:
1932 if minElements == 1:
1933 ret = self + makeOptionalList(optElements)
1934 else:
1935 ret = And([self]*minElements) + makeOptionalList(optElements)
1936 else:
1937 ret = makeOptionalList(optElements)
1938 else:
1939 if minElements == 1:
1940 ret = self
1941 else:
1942 ret = And([self]*minElements)
1943 return ret
1944
1945 def __rmul__(self, other):
1946 return self.__mul__(other)
1947
1948 def __or__(self, other ):
1949 """
1950 Implementation of | operator - returns C{L{MatchFirst}}
1951 """
1952 if isinstance( other, basestring ):
1953 other = ParserElement._literalStringClass( other )
1954 if not isinstance( other, ParserElement ):
1955 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1956 SyntaxWarning, stacklevel=2)
1957 return None
1958 return MatchFirst( [ self, other ] )
1959
1960 def __ror__(self, other ):
1961 """
1962 Implementation of | operator when left operand is not a C{L{ParserElement}}
1963 """
1964 if isinstance( other, basestring ):
1965 other = ParserElement._literalStringClass( other )
1966 if not isinstance( other, ParserElement ):
1967 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1968 SyntaxWarning, stacklevel=2)
1969 return None
1970 return other | self
1971
1972 def __xor__(self, other ):
1973 """
1974 Implementation of ^ operator - returns C{L{Or}}
1975 """
1976 if isinstance( other, basestring ):
1977 other = ParserElement._literalStringClass( other )
1978 if not isinstance( other, ParserElement ):
1979 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1980 SyntaxWarning, stacklevel=2)
1981 return None
1982 return Or( [ self, other ] )
1983
1984 def __rxor__(self, other ):
1985 """
1986 Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1987 """
1988 if isinstance( other, basestring ):
1989 other = ParserElement._literalStringClass( other )
1990 if not isinstance( other, ParserElement ):
1991 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1992 SyntaxWarning, stacklevel=2)
1993 return None
1994 return other ^ self
1995
1996 def __and__(self, other ):
1997 """
1998 Implementation of & operator - returns C{L{Each}}
1999 """
2000 if isinstance( other, basestring ):
2001 other = ParserElement._literalStringClass( other )
2002 if not isinstance( other, ParserElement ):
2003 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2004 SyntaxWarning, stacklevel=2)
2005 return None
2006 return Each( [ self, other ] )
2007
2008 def __rand__(self, other ):
2009 """
2010 Implementation of & operator when left operand is not a C{L{ParserElement}}
2011 """
2012 if isinstance( other, basestring ):
2013 other = ParserElement._literalStringClass( other )
2014 if not isinstance( other, ParserElement ):
2015 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2016 SyntaxWarning, stacklevel=2)
2017 return None
2018 return other & self
2019
2020 def __invert__( self ):
2021 """
2022 Implementation of ~ operator - returns C{L{NotAny}}
2023 """
2024 return NotAny( self )
2025
2026 def __call__(self, name=None):
2027 """
2028 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
2029
2030 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
2031 passed as C{True}.
2032
2033 If C{name} is omitted, same as calling C{L{copy}}.
2034
2035 Example::
2036 # these are equivalent
2037 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
2038 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
2039 """
2040 if name is not None:
2041 return self.setResultsName(name)
2042 else:
2043 return self.copy()
2044
2045 def suppress( self ):
2046 """
2047 Suppresses the output of this C{ParserElement}; useful to keep punctuation from
2048 cluttering up returned output.
2049 """
2050 return Suppress( self )
2051
2052 def leaveWhitespace( self ):
2053 """
2054 Disables the skipping of whitespace before matching the characters in the
2055 C{ParserElement}'s defined pattern. This is normally only used internally by
2056 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2057 """
2058 self.skipWhitespace = False
2059 return self
2060
2061 def setWhitespaceChars( self, chars ):
2062 """
2063 Overrides the default whitespace chars
2064 """
2065 self.skipWhitespace = True
2066 self.whiteChars = chars
2067 self.copyDefaultWhiteChars = False
2068 return self
2069
2070 def parseWithTabs( self ):
2071 """
2072 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2073 Must be called before C{parseString} when the input grammar contains elements that
2074 match C{<TAB>} characters.
2075 """
2076 self.keepTabs = True
2077 return self
2078
2079 def ignore( self, other ):
2080 """
2081 Define expression to be ignored (e.g., comments) while doing pattern
2082 matching; may be called repeatedly, to define multiple comment or other
2083 ignorable patterns.
2084
2085 Example::
2086 patt = OneOrMore(Word(alphas))
2087 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2088
2089 patt.ignore(cStyleComment)
2090 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2091 """
2092 if isinstance(other, basestring):
2093 other = Suppress(other)
2094
2095 if isinstance( other, Suppress ):
2096 if other not in self.ignoreExprs:
2097 self.ignoreExprs.append(other)
2098 else:
2099 self.ignoreExprs.append( Suppress( other.copy() ) )
2100 return self
2101
2102 def setDebugActions( self, startAction, successAction, exceptionAction ):
2103 """
2104 Enable display of debugging messages while doing pattern matching.
2105 """
2106 self.debugActions = (startAction or _defaultStartDebugAction,
2107 successAction or _defaultSuccessDebugAction,
2108 exceptionAction or _defaultExceptionDebugAction)
2109 self.debug = True
2110 return self
2111
2112 def setDebug( self, flag=True ):
2113 """
2114 Enable display of debugging messages while doing pattern matching.
2115 Set C{flag} to True to enable, False to disable.
2116
2117 Example::
2118 wd = Word(alphas).setName("alphaword")
2119 integer = Word(nums).setName("numword")
2120 term = wd | integer
2121
2122 # turn on debugging for wd
2123 wd.setDebug()
2124
2125 OneOrMore(term).parseString("abc 123 xyz 890")
2126
2127 prints::
2128 Match alphaword at loc 0(1,1)
2129 Matched alphaword -> ['abc']
2130 Match alphaword at loc 3(1,4)
2131 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2132 Match alphaword at loc 7(1,8)
2133 Matched alphaword -> ['xyz']
2134 Match alphaword at loc 11(1,12)
2135 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2136 Match alphaword at loc 15(1,16)
2137 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2138
2139 The output shown is that produced by the default debug actions - custom debug actions can be
2140 specified using L{setDebugActions}. Prior to attempting
2141 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2142 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2143 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2144 which makes debugging and exception messages easier to understand - for instance, the default
2145 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2146 """
2147 if flag:
2148 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2149 else:
2150 self.debug = False
2151 return self
2152
2153 def __str__( self ):
2154 return self.name
2155
2156 def __repr__( self ):
2157 return _ustr(self)
2158
2159 def streamline( self ):
2160 self.streamlined = True
2161 self.strRepr = None
2162 return self
2163
2164 def checkRecursion( self, parseElementList ):
2165 pass
2166
2167 def validate( self, validateTrace=[] ):
2168 """
2169 Check defined expressions for valid structure, check for infinite recursive definitions.
2170 """
2171 self.checkRecursion( [] )
2172
2173 def parseFile( self, file_or_filename, parseAll=False ):
2174 """
2175 Execute the parse expression on the given file or filename.
2176 If a filename is specified (instead of a file object),
2177 the entire file is opened, read, and closed before parsing.
2178 """
2179 try:
2180 file_contents = file_or_filename.read()
2181 except AttributeError:
2182 with open(file_or_filename, "r") as f:
2183 file_contents = f.read()
2184 try:
2185 return self.parseString(file_contents, parseAll)
2186 except ParseBaseException as exc:
2187 if ParserElement.verbose_stacktrace:
2188 raise
2189 else:
2190 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2191 raise exc
2192
2193 def __eq__(self,other):
2194 if isinstance(other, ParserElement):
2195 return self is other or vars(self) == vars(other)
2196 elif isinstance(other, basestring):
2197 return self.matches(other)
2198 else:
2199 return super(ParserElement,self)==other
2200
2201 def __ne__(self,other):
2202 return not (self == other)
2203
2204 def __hash__(self):
2205 return hash(id(self))
2206
2207 def __req__(self,other):
2208 return self == other
2209
2210 def __rne__(self,other):
2211 return not (self == other)
2212
2213 def matches(self, testString, parseAll=True):
2214 """
2215 Method for quick testing of a parser against a test string. Good for simple
2216 inline microtests of sub expressions while building up larger parser.
2217
2218 Parameters:
2219 - testString - to test against this expression for a match
2220 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2221
2222 Example::
2223 expr = Word(nums)
2224 assert expr.matches("100")
2225 """
2226 try:
2227 self.parseString(_ustr(testString), parseAll=parseAll)
2228 return True
2229 except ParseBaseException:
2230 return False
2231
2232 def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2233 """
2234 Execute the parse expression on a series of test strings, showing each
2235 test, the parsed results or where the parse failed. Quick and easy way to
2236 run a parse expression against a list of sample strings.
2237
2238 Parameters:
2239 - tests - a list of separate test strings, or a multiline string of test strings
2240 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2241 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2242 string; pass None to disable comment filtering
2243 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2244 if False, only dump nested list
2245 - printResults - (default=C{True}) prints test output to stdout
2246 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2247
2248 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2249 (or failed if C{failureTests} is True), and the results contain a list of lines of each
2250 test's output
2251
2252 Example::
2253 number_expr = pyparsing_common.number.copy()
2254
2255 result = number_expr.runTests('''
2256 # unsigned integer
2257 100
2258 # negative integer
2259 -100
2260 # float with scientific notation
2261 6.02e23
2262 # integer with scientific notation
2263 1e-12
2264 ''')
2265 print("Success" if result[0] else "Failed!")
2266
2267 result = number_expr.runTests('''
2268 # stray character
2269 100Z
2270 # missing leading digit before '.'
2271 -.100
2272 # too many '.'
2273 3.14.159
2274 ''', failureTests=True)
2275 print("Success" if result[0] else "Failed!")
2276 prints::
2277 # unsigned integer
2278 100
2279 [100]
2280
2281 # negative integer
2282 -100
2283 [-100]
2284
2285 # float with scientific notation
2286 6.02e23
2287 [6.02e+23]
2288
2289 # integer with scientific notation
2290 1e-12
2291 [1e-12]
2292
2293 Success
2294
2295 # stray character
2296 100Z
2297 ^
2298 FAIL: Expected end of text (at char 3), (line:1, col:4)
2299
2300 # missing leading digit before '.'
2301 -.100
2302 ^
2303 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2304
2305 # too many '.'
2306 3.14.159
2307 ^
2308 FAIL: Expected end of text (at char 4), (line:1, col:5)
2309
2310 Success
2311
2312 Each test string must be on a single line. If you want to test a string that spans multiple
2313 lines, create a test like this::
2314
2315 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2316
2317 (Note that this is a raw string literal, you must include the leading 'r'.)
2318 """
2319 if isinstance(tests, basestring):
2320 tests = list(map(str.strip, tests.rstrip().splitlines()))
2321 if isinstance(comment, basestring):
2322 comment = Literal(comment)
2323 allResults = []
2324 comments = []
2325 success = True
2326 for t in tests:
2327 if comment is not None and comment.matches(t, False) or comments and not t:
2328 comments.append(t)
2329 continue
2330 if not t:
2331 continue
2332 out = ['\n'.join(comments), t]
2333 comments = []
2334 try:
2335 t = t.replace(r'\n','\n')
2336 result = self.parseString(t, parseAll=parseAll)
2337 out.append(result.dump(full=fullDump))
2338 success = success and not failureTests
2339 except ParseBaseException as pe:
2340 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2341 if '\n' in t:
2342 out.append(line(pe.loc, t))
2343 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2344 else:
2345 out.append(' '*pe.loc + '^' + fatal)
2346 out.append("FAIL: " + str(pe))
2347 success = success and failureTests
2348 result = pe
2349 except Exception as exc:
2350 out.append("FAIL-EXCEPTION: " + str(exc))
2351 success = success and failureTests
2352 result = exc
2353
2354 if printResults:
2355 if fullDump:
2356 out.append('')
2357 print('\n'.join(out))
2358
2359 allResults.append((t, result))
2360
2361 return success, allResults
2362
2363
2364 class Token(ParserElement):
2365 """
2366 Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2367 """
2368 def __init__( self ):
2369 super(Token,self).__init__( savelist=False )
2370
2371
2372 class Empty(Token):
2373 """
2374 An empty token, will always match.
2375 """
2376 def __init__( self ):
2377 super(Empty,self).__init__()
2378 self.name = "Empty"
2379 self.mayReturnEmpty = True
2380 self.mayIndexError = False
2381
2382
2383 class NoMatch(Token):
2384 """
2385 A token that will never match.
2386 """
2387 def __init__( self ):
2388 super(NoMatch,self).__init__()
2389 self.name = "NoMatch"
2390 self.mayReturnEmpty = True
2391 self.mayIndexError = False
2392 self.errmsg = "Unmatchable token"
2393
2394 def parseImpl( self, instring, loc, doActions=True ):
2395 raise ParseException(instring, loc, self.errmsg, self)
2396
2397
2398 class Literal(Token):
2399 """
2400 Token to exactly match a specified string.
2401
2402 Example::
2403 Literal('blah').parseString('blah') # -> ['blah']
2404 Literal('blah').parseString('blahfooblah') # -> ['blah']
2405 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2406
2407 For case-insensitive matching, use L{CaselessLiteral}.
2408
2409 For keyword matching (force word break before and after the matched string),
2410 use L{Keyword} or L{CaselessKeyword}.
2411 """
2412 def __init__( self, matchString ):
2413 super(Literal,self).__init__()
2414 self.match = matchString
2415 self.matchLen = len(matchString)
2416 try:
2417 self.firstMatchChar = matchString[0]
2418 except IndexError:
2419 warnings.warn("null string passed to Literal; use Empty() instead",
2420 SyntaxWarning, stacklevel=2)
2421 self.__class__ = Empty
2422 self.name = '"%s"' % _ustr(self.match)
2423 self.errmsg = "Expected " + self.name
2424 self.mayReturnEmpty = False
2425 self.mayIndexError = False
2426
2427 # Performance tuning: this routine gets called a *lot*
2428 # if this is a single character match string and the first character matches,
2429 # short-circuit as quickly as possible, and avoid calling startswith
2430 #~ @profile
2431 def parseImpl( self, instring, loc, doActions=True ):
2432 if (instring[loc] == self.firstMatchChar and
2433 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2434 return loc+self.matchLen, self.match
2435 raise ParseException(instring, loc, self.errmsg, self)
2436 _L = Literal
2437 ParserElement._literalStringClass = Literal
2438
2439 class Keyword(Token):
2440 """
2441 Token to exactly match a specified string as a keyword, that is, it must be
2442 immediately followed by a non-keyword character. Compare with C{L{Literal}}:
2443 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2444 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2445 Accepts two optional constructor arguments in addition to the keyword string:
2446 - C{identChars} is a string of characters that would be valid identifier characters,
2447 defaulting to all alphanumerics + "_" and "$"
2448 - C{caseless} allows case-insensitive matching, default is C{False}.
2449
2450 Example::
2451 Keyword("start").parseString("start") # -> ['start']
2452 Keyword("start").parseString("starting") # -> Exception
2453
2454 For case-insensitive matching, use L{CaselessKeyword}.
2455 """
2456 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2457
2458 def __init__( self, matchString, identChars=None, caseless=False ):
2459 super(Keyword,self).__init__()
2460 if identChars is None:
2461 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2462 self.match = matchString
2463 self.matchLen = len(matchString)
2464 try:
2465 self.firstMatchChar = matchString[0]
2466 except IndexError:
2467 warnings.warn("null string passed to Keyword; use Empty() instead",
2468 SyntaxWarning, stacklevel=2)
2469 self.name = '"%s"' % self.match
2470 self.errmsg = "Expected " + self.name
2471 self.mayReturnEmpty = False
2472 self.mayIndexError = False
2473 self.caseless = caseless
2474 if caseless:
2475 self.caselessmatch = matchString.upper()
2476 identChars = identChars.upper()
2477 self.identChars = set(identChars)
2478
2479 def parseImpl( self, instring, loc, doActions=True ):
2480 if self.caseless:
2481 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2482 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2483 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2484 return loc+self.matchLen, self.match
2485 else:
2486 if (instring[loc] == self.firstMatchChar and
2487 (self.matchLen==1 or instring.startswith(self.match,loc)) and
2488 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2489 (loc == 0 or instring[loc-1] not in self.identChars) ):
2490 return loc+self.matchLen, self.match
2491 raise ParseException(instring, loc, self.errmsg, self)
2492
2493 def copy(self):
2494 c = super(Keyword,self).copy()
2495 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2496 return c
2497
2498 @staticmethod
2499 def setDefaultKeywordChars( chars ):
2500 """Overrides the default Keyword chars
2501 """
2502 Keyword.DEFAULT_KEYWORD_CHARS = chars
2503
2504 class CaselessLiteral(Literal):
2505 """
2506 Token to match a specified string, ignoring case of letters.
2507 Note: the matched results will always be in the case of the given
2508 match string, NOT the case of the input text.
2509
2510 Example::
2511 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2512
2513 (Contrast with example for L{CaselessKeyword}.)
2514 """
2515 def __init__( self, matchString ):
2516 super(CaselessLiteral,self).__init__( matchString.upper() )
2517 # Preserve the defining literal.
2518 self.returnString = matchString
2519 self.name = "'%s'" % self.returnString
2520 self.errmsg = "Expected " + self.name
2521
2522 def parseImpl( self, instring, loc, doActions=True ):
2523 if instring[ loc:loc+self.matchLen ].upper() == self.match:
2524 return loc+self.matchLen, self.returnString
2525 raise ParseException(instring, loc, self.errmsg, self)
2526
2527 class CaselessKeyword(Keyword):
2528 """
2529 Caseless version of L{Keyword}.
2530
2531 Example::
2532 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2533
2534 (Contrast with example for L{CaselessLiteral}.)
2535 """
2536 def __init__( self, matchString, identChars=None ):
2537 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2538
2539 def parseImpl( self, instring, loc, doActions=True ):
2540 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2541 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2542 return loc+self.matchLen, self.match
2543 raise ParseException(instring, loc, self.errmsg, self)
2544
2545 class CloseMatch(Token):
2546 """
2547 A variation on L{Literal} which matches "close" matches, that is,
2548 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
2549 - C{match_string} - string to be matched
2550 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
2551
2552 The results from a successful parse will contain the matched text from the input string and the following named results:
2553 - C{mismatches} - a list of the positions within the match_string where mismatches were found
2554 - C{original} - the original match_string used to compare against the input string
2555
2556 If C{mismatches} is an empty list, then the match was an exact match.
2557
2558 Example::
2559 patt = CloseMatch("ATCATCGAATGGA")
2560 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2561 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2562
2563 # exact match
2564 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2565
2566 # close match allowing up to 2 mismatches
2567 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2568 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2569 """
2570 def __init__(self, match_string, maxMismatches=1):
2571 super(CloseMatch,self).__init__()
2572 self.name = match_string
2573 self.match_string = match_string
2574 self.maxMismatches = maxMismatches
2575 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2576 self.mayIndexError = False
2577 self.mayReturnEmpty = False
2578
2579 def parseImpl( self, instring, loc, doActions=True ):
2580 start = loc
2581 instrlen = len(instring)
2582 maxloc = start + len(self.match_string)
2583
2584 if maxloc <= instrlen:
2585 match_string = self.match_string
2586 match_stringloc = 0
2587 mismatches = []
2588 maxMismatches = self.maxMismatches
2589
2590 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2591 src,mat = s_m
2592 if src != mat:
2593 mismatches.append(match_stringloc)
2594 if len(mismatches) > maxMismatches:
2595 break
2596 else:
2597 loc = match_stringloc + 1
2598 results = ParseResults([instring[start:loc]])
2599 results['original'] = self.match_string
2600 results['mismatches'] = mismatches
2601 return loc, results
2602
2603 raise ParseException(instring, loc, self.errmsg, self)
2604
2605
2606 class Word(Token):
2607 """
2608 Token for matching words composed of allowed character sets.
2609 Defined with string containing all allowed initial characters,
2610 an optional string containing allowed body characters (if omitted,
2611 defaults to the initial character set), and an optional minimum,
2612 maximum, and/or exact length. The default value for C{min} is 1 (a
2613 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2614 are 0, meaning no maximum or exact length restriction. An optional
2615 C{excludeChars} parameter can list characters that might be found in
2616 the input C{bodyChars} string; useful to define a word of all printables
2617 except for one or two characters, for instance.
2618
2619 L{srange} is useful for defining custom character set strings for defining
2620 C{Word} expressions, using range notation from regular expression character sets.
2621
2622 A common mistake is to use C{Word} to match a specific literal string, as in
2623 C{Word("Address")}. Remember that C{Word} uses the string argument to define
2624 I{sets} of matchable characters. This expression would match "Add", "AAA",
2625 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2626 To match an exact literal string, use L{Literal} or L{Keyword}.
2627
2628 pyparsing includes helper strings for building Words:
2629 - L{alphas}
2630 - L{nums}
2631 - L{alphanums}
2632 - L{hexnums}
2633 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2634 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2635 - L{printables} (any non-whitespace character)
2636
2637 Example::
2638 # a word composed of digits
2639 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2640
2641 # a word with a leading capital, and zero or more lowercase
2642 capital_word = Word(alphas.upper(), alphas.lower())
2643
2644 # hostnames are alphanumeric, with leading alpha, and '-'
2645 hostname = Word(alphas, alphanums+'-')
2646
2647 # roman numeral (not a strict parser, accepts invalid mix of characters)
2648 roman = Word("IVXLCDM")
2649
2650 # any string of non-whitespace characters, except for ','
2651 csv_value = Word(printables, excludeChars=",")
2652 """
2653 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2654 super(Word,self).__init__()
2655 if excludeChars:
2656 initChars = ''.join(c for c in initChars if c not in excludeChars)
2657 if bodyChars:
2658 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2659 self.initCharsOrig = initChars
2660 self.initChars = set(initChars)
2661 if bodyChars :
2662 self.bodyCharsOrig = bodyChars
2663 self.bodyChars = set(bodyChars)
2664 else:
2665 self.bodyCharsOrig = initChars
2666 self.bodyChars = set(initChars)
2667
2668 self.maxSpecified = max > 0
2669
2670 if min < 1:
2671 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2672
2673 self.minLen = min
2674
2675 if max > 0:
2676 self.maxLen = max
2677 else:
2678 self.maxLen = _MAX_INT
2679
2680 if exact > 0:
2681 self.maxLen = exact
2682 self.minLen = exact
2683
2684 self.name = _ustr(self)
2685 self.errmsg = "Expected " + self.name
2686 self.mayIndexError = False
2687 self.asKeyword = asKeyword
2688
2689 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2690 if self.bodyCharsOrig == self.initCharsOrig:
2691 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2692 elif len(self.initCharsOrig) == 1:
2693 self.reString = "%s[%s]*" % \
2694 (re.escape(self.initCharsOrig),
2695 _escapeRegexRangeChars(self.bodyCharsOrig),)
2696 else:
2697 self.reString = "[%s][%s]*" % \
2698 (_escapeRegexRangeChars(self.initCharsOrig),
2699 _escapeRegexRangeChars(self.bodyCharsOrig),)
2700 if self.asKeyword:
2701 self.reString = r"\b"+self.reString+r"\b"
2702 try:
2703 self.re = re.compile( self.reString )
2704 except Exception:
2705 self.re = None
2706
2707 def parseImpl( self, instring, loc, doActions=True ):
2708 if self.re:
2709 result = self.re.match(instring,loc)
2710 if not result:
2711 raise ParseException(instring, loc, self.errmsg, self)
2712
2713 loc = result.end()
2714 return loc, result.group()
2715
2716 if not(instring[ loc ] in self.initChars):
2717 raise ParseException(instring, loc, self.errmsg, self)
2718
2719 start = loc
2720 loc += 1
2721 instrlen = len(instring)
2722 bodychars = self.bodyChars
2723 maxloc = start + self.maxLen
2724 maxloc = min( maxloc, instrlen )
2725 while loc < maxloc and instring[loc] in bodychars:
2726 loc += 1
2727
2728 throwException = False
2729 if loc - start < self.minLen:
2730 throwException = True
2731 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2732 throwException = True
2733 if self.asKeyword:
2734 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2735 throwException = True
2736
2737 if throwException:
2738 raise ParseException(instring, loc, self.errmsg, self)
2739
2740 return loc, instring[start:loc]
2741
2742 def __str__( self ):
2743 try:
2744 return super(Word,self).__str__()
2745 except Exception:
2746 pass
2747
2748
2749 if self.strRepr is None:
2750
2751 def charsAsStr(s):
2752 if len(s)>4:
2753 return s[:4]+"..."
2754 else:
2755 return s
2756
2757 if ( self.initCharsOrig != self.bodyCharsOrig ):
2758 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2759 else:
2760 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2761
2762 return self.strRepr
2763
2764
2765 class Regex(Token):
2766 r"""
2767 Token for matching strings that match a given regular expression.
2768 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2769 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
2770 named parse results.
2771
2772 Example::
2773 realnum = Regex(r"[+-]?\d+\.\d*")
2774 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2775 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2776 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2777 """
2778 compiledREtype = type(re.compile("[A-Z]"))
2779 def __init__( self, pattern, flags=0):
2780 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2781 super(Regex,self).__init__()
2782
2783 if isinstance(pattern, basestring):
2784 if not pattern:
2785 warnings.warn("null string passed to Regex; use Empty() instead",
2786 SyntaxWarning, stacklevel=2)
2787
2788 self.pattern = pattern
2789 self.flags = flags
2790
2791 try:
2792 self.re = re.compile(self.pattern, self.flags)
2793 self.reString = self.pattern
2794 except sre_constants.error:
2795 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2796 SyntaxWarning, stacklevel=2)
2797 raise
2798
2799 elif isinstance(pattern, Regex.compiledREtype):
2800 self.re = pattern
2801 self.pattern = \
2802 self.reString = str(pattern)
2803 self.flags = flags
2804
2805 else:
2806 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2807
2808 self.name = _ustr(self)
2809 self.errmsg = "Expected " + self.name
2810 self.mayIndexError = False
2811 self.mayReturnEmpty = True
2812
2813 def parseImpl( self, instring, loc, doActions=True ):
2814 result = self.re.match(instring,loc)
2815 if not result:
2816 raise ParseException(instring, loc, self.errmsg, self)
2817
2818 loc = result.end()
2819 d = result.groupdict()
2820 ret = ParseResults(result.group())
2821 if d:
2822 for k in d:
2823 ret[k] = d[k]
2824 return loc,ret
2825
2826 def __str__( self ):
2827 try:
2828 return super(Regex,self).__str__()
2829 except Exception:
2830 pass
2831
2832 if self.strRepr is None:
2833 self.strRepr = "Re:(%s)" % repr(self.pattern)
2834
2835 return self.strRepr
2836
2837
2838 class QuotedString(Token):
2839 r"""
2840 Token for matching strings that are delimited by quoting characters.
2841
2842 Defined with the following parameters:
2843 - quoteChar - string of one or more characters defining the quote delimiting string
2844 - escChar - character to escape quotes, typically backslash (default=C{None})
2845 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2846 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2847 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2848 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2849 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2850
2851 Example::
2852 qs = QuotedString('"')
2853 print(qs.searchString('lsjdf "This is the quote" sldjf'))
2854 complex_qs = QuotedString('{{', endQuoteChar='}}')
2855 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2856 sql_qs = QuotedString('"', escQuote='""')
2857 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2858 prints::
2859 [['This is the quote']]
2860 [['This is the "quote"']]
2861 [['This is the quote with "embedded" quotes']]
2862 """
2863 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2864 super(QuotedString,self).__init__()
2865
2866 # remove white space from quote chars - wont work anyway
2867 quoteChar = quoteChar.strip()
2868 if not quoteChar:
2869 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2870 raise SyntaxError()
2871
2872 if endQuoteChar is None:
2873 endQuoteChar = quoteChar
2874 else:
2875 endQuoteChar = endQuoteChar.strip()
2876 if not endQuoteChar:
2877 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2878 raise SyntaxError()
2879
2880 self.quoteChar = quoteChar
2881 self.quoteCharLen = len(quoteChar)
2882 self.firstQuoteChar = quoteChar[0]
2883 self.endQuoteChar = endQuoteChar
2884 self.endQuoteCharLen = len(endQuoteChar)
2885 self.escChar = escChar
2886 self.escQuote = escQuote
2887 self.unquoteResults = unquoteResults
2888 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2889
2890 if multiline:
2891 self.flags = re.MULTILINE | re.DOTALL
2892 self.pattern = r'%s(?:[^%s%s]' % \
2893 ( re.escape(self.quoteChar),
2894 _escapeRegexRangeChars(self.endQuoteChar[0]),
2895 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2896 else:
2897 self.flags = 0
2898 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2899 ( re.escape(self.quoteChar),
2900 _escapeRegexRangeChars(self.endQuoteChar[0]),
2901 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2902 if len(self.endQuoteChar) > 1:
2903 self.pattern += (
2904 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2905 _escapeRegexRangeChars(self.endQuoteChar[i]))
2906 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2907 )
2908 if escQuote:
2909 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2910 if escChar:
2911 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2912 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2913 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2914
2915 try:
2916 self.re = re.compile(self.pattern, self.flags)
2917 self.reString = self.pattern
2918 except sre_constants.error:
2919 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2920 SyntaxWarning, stacklevel=2)
2921 raise
2922
2923 self.name = _ustr(self)
2924 self.errmsg = "Expected " + self.name
2925 self.mayIndexError = False
2926 self.mayReturnEmpty = True
2927
2928 def parseImpl( self, instring, loc, doActions=True ):
2929 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2930 if not result:
2931 raise ParseException(instring, loc, self.errmsg, self)
2932
2933 loc = result.end()
2934 ret = result.group()
2935
2936 if self.unquoteResults:
2937
2938 # strip off quotes
2939 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2940
2941 if isinstance(ret,basestring):
2942 # replace escaped whitespace
2943 if '\\' in ret and self.convertWhitespaceEscapes:
2944 ws_map = {
2945 r'\t' : '\t',
2946 r'\n' : '\n',
2947 r'\f' : '\f',
2948 r'\r' : '\r',
2949 }
2950 for wslit,wschar in ws_map.items():
2951 ret = ret.replace(wslit, wschar)
2952
2953 # replace escaped characters
2954 if self.escChar:
2955 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
2956
2957 # replace escaped quotes
2958 if self.escQuote:
2959 ret = ret.replace(self.escQuote, self.endQuoteChar)
2960
2961 return loc, ret
2962
2963 def __str__( self ):
2964 try:
2965 return super(QuotedString,self).__str__()
2966 except Exception:
2967 pass
2968
2969 if self.strRepr is None:
2970 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2971
2972 return self.strRepr
2973
2974
2975 class CharsNotIn(Token):
2976 """
2977 Token for matching words composed of characters I{not} in a given set (will
2978 include whitespace in matched characters if not listed in the provided exclusion set - see example).
2979 Defined with string containing all disallowed characters, and an optional
2980 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2981 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2982 are 0, meaning no maximum or exact length restriction.
2983
2984 Example::
2985 # define a comma-separated-value as anything that is not a ','
2986 csv_value = CharsNotIn(',')
2987 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2988 prints::
2989 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2990 """
2991 def __init__( self, notChars, min=1, max=0, exact=0 ):
2992 super(CharsNotIn,self).__init__()
2993 self.skipWhitespace = False
2994 self.notChars = notChars
2995
2996 if min < 1:
2997 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2998
2999 self.minLen = min
3000
3001 if max > 0:
3002 self.maxLen = max
3003 else:
3004 self.maxLen = _MAX_INT
3005
3006 if exact > 0:
3007 self.maxLen = exact
3008 self.minLen = exact
3009
3010 self.name = _ustr(self)
3011 self.errmsg = "Expected " + self.name
3012 self.mayReturnEmpty = ( self.minLen == 0 )
3013 self.mayIndexError = False
3014
3015 def parseImpl( self, instring, loc, doActions=True ):
3016 if instring[loc] in self.notChars:
3017 raise ParseException(instring, loc, self.errmsg, self)
3018
3019 start = loc
3020 loc += 1
3021 notchars = self.notChars
3022 maxlen = min( start+self.maxLen, len(instring) )
3023 while loc < maxlen and \
3024 (instring[loc] not in notchars):
3025 loc += 1
3026
3027 if loc - start < self.minLen:
3028 raise ParseException(instring, loc, self.errmsg, self)
3029
3030 return loc, instring[start:loc]
3031
3032 def __str__( self ):
3033 try:
3034 return super(CharsNotIn, self).__str__()
3035 except Exception:
3036 pass
3037
3038 if self.strRepr is None:
3039 if len(self.notChars) > 4:
3040 self.strRepr = "!W:(%s...)" % self.notChars[:4]
3041 else:
3042 self.strRepr = "!W:(%s)" % self.notChars
3043
3044 return self.strRepr
3045
3046 class White(Token):
3047 """
3048 Special matching class for matching whitespace. Normally, whitespace is ignored
3049 by pyparsing grammars. This class is included when some whitespace structures
3050 are significant. Define with a string containing the whitespace characters to be
3051 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
3052 as defined for the C{L{Word}} class.
3053 """
3054 whiteStrs = {
3055 " " : "<SPC>",
3056 "\t": "<TAB>",
3057 "\n": "<LF>",
3058 "\r": "<CR>",
3059 "\f": "<FF>",
3060 }
3061 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3062 super(White,self).__init__()
3063 self.matchWhite = ws
3064 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3065 #~ self.leaveWhitespace()
3066 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3067 self.mayReturnEmpty = True
3068 self.errmsg = "Expected " + self.name
3069
3070 self.minLen = min
3071
3072 if max > 0:
3073 self.maxLen = max
3074 else:
3075 self.maxLen = _MAX_INT
3076
3077 if exact > 0:
3078 self.maxLen = exact
3079 self.minLen = exact
3080
3081 def parseImpl( self, instring, loc, doActions=True ):
3082 if not(instring[ loc ] in self.matchWhite):
3083 raise ParseException(instring, loc, self.errmsg, self)
3084 start = loc
3085 loc += 1
3086 maxloc = start + self.maxLen
3087 maxloc = min( maxloc, len(instring) )
3088 while loc < maxloc and instring[loc] in self.matchWhite:
3089 loc += 1
3090
3091 if loc - start < self.minLen:
3092 raise ParseException(instring, loc, self.errmsg, self)
3093
3094 return loc, instring[start:loc]
3095
3096
3097 class _PositionToken(Token):
3098 def __init__( self ):
3099 super(_PositionToken,self).__init__()
3100 self.name=self.__class__.__name__
3101 self.mayReturnEmpty = True
3102 self.mayIndexError = False
3103
3104 class GoToColumn(_PositionToken):
3105 """
3106 Token to advance to a specific column of input text; useful for tabular report scraping.
3107 """
3108 def __init__( self, colno ):
3109 super(GoToColumn,self).__init__()
3110 self.col = colno
3111
3112 def preParse( self, instring, loc ):
3113 if col(loc,instring) != self.col:
3114 instrlen = len(instring)
3115 if self.ignoreExprs:
3116 loc = self._skipIgnorables( instring, loc )
3117 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3118 loc += 1
3119 return loc
3120
3121 def parseImpl( self, instring, loc, doActions=True ):
3122 thiscol = col( loc, instring )
3123 if thiscol > self.col:
3124 raise ParseException( instring, loc, "Text not in expected column", self )
3125 newloc = loc + self.col - thiscol
3126 ret = instring[ loc: newloc ]
3127 return newloc, ret
3128
3129
3130 class LineStart(_PositionToken):
3131 """
3132 Matches if current position is at the beginning of a line within the parse string
3133
3134 Example::
3135
3136 test = '''\
3137 AAA this line
3138 AAA and this line
3139 AAA but not this one
3140 B AAA and definitely not this one
3141 '''
3142
3143 for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3144 print(t)
3145
3146 Prints::
3147 ['AAA', ' this line']
3148 ['AAA', ' and this line']
3149
3150 """
3151 def __init__( self ):
3152 super(LineStart,self).__init__()
3153 self.errmsg = "Expected start of line"
3154
3155 def parseImpl( self, instring, loc, doActions=True ):
3156 if col(loc, instring) == 1:
3157 return loc, []
3158 raise ParseException(instring, loc, self.errmsg, self)
3159
3160 class LineEnd(_PositionToken):
3161 """
3162 Matches if current position is at the end of a line within the parse string
3163 """
3164 def __init__( self ):
3165 super(LineEnd,self).__init__()
3166 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
3167 self.errmsg = "Expected end of line"
3168
3169 def parseImpl( self, instring, loc, doActions=True ):
3170 if loc<len(instring):
3171 if instring[loc] == "\n":
3172 return loc+1, "\n"
3173 else:
3174 raise ParseException(instring, loc, self.errmsg, self)
3175 elif loc == len(instring):
3176 return loc+1, []
3177 else:
3178 raise ParseException(instring, loc, self.errmsg, self)
3179
3180 class StringStart(_PositionToken):
3181 """
3182 Matches if current position is at the beginning of the parse string
3183 """
3184 def __init__( self ):
3185 super(StringStart,self).__init__()
3186 self.errmsg = "Expected start of text"
3187
3188 def parseImpl( self, instring, loc, doActions=True ):
3189 if loc != 0:
3190 # see if entire string up to here is just whitespace and ignoreables
3191 if loc != self.preParse( instring, 0 ):
3192 raise ParseException(instring, loc, self.errmsg, self)
3193 return loc, []
3194
3195 class StringEnd(_PositionToken):
3196 """
3197 Matches if current position is at the end of the parse string
3198 """
3199 def __init__( self ):
3200 super(StringEnd,self).__init__()
3201 self.errmsg = "Expected end of text"
3202
3203 def parseImpl( self, instring, loc, doActions=True ):
3204 if loc < len(instring):
3205 raise ParseException(instring, loc, self.errmsg, self)
3206 elif loc == len(instring):
3207 return loc+1, []
3208 elif loc > len(instring):
3209 return loc, []
3210 else:
3211 raise ParseException(instring, loc, self.errmsg, self)
3212
3213 class WordStart(_PositionToken):
3214 """
3215 Matches if the current position is at the beginning of a Word, and
3216 is not preceded by any character in a given set of C{wordChars}
3217 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3218 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3219 the string being parsed, or at the beginning of a line.
3220 """
3221 def __init__(self, wordChars = printables):
3222 super(WordStart,self).__init__()
3223 self.wordChars = set(wordChars)
3224 self.errmsg = "Not at the start of a word"
3225
3226 def parseImpl(self, instring, loc, doActions=True ):
3227 if loc != 0:
3228 if (instring[loc-1] in self.wordChars or
3229 instring[loc] not in self.wordChars):
3230 raise ParseException(instring, loc, self.errmsg, self)
3231 return loc, []
3232
3233 class WordEnd(_PositionToken):
3234 """
3235 Matches if the current position is at the end of a Word, and
3236 is not followed by any character in a given set of C{wordChars}
3237 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3238 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3239 the string being parsed, or at the end of a line.
3240 """
3241 def __init__(self, wordChars = printables):
3242 super(WordEnd,self).__init__()
3243 self.wordChars = set(wordChars)
3244 self.skipWhitespace = False
3245 self.errmsg = "Not at the end of a word"
3246
3247 def parseImpl(self, instring, loc, doActions=True ):
3248 instrlen = len(instring)
3249 if instrlen>0 and loc<instrlen:
3250 if (instring[loc] in self.wordChars or
3251 instring[loc-1] not in self.wordChars):
3252 raise ParseException(instring, loc, self.errmsg, self)
3253 return loc, []
3254
3255
3256 class ParseExpression(ParserElement):
3257 """
3258 Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3259 """
3260 def __init__( self, exprs, savelist = False ):
3261 super(ParseExpression,self).__init__(savelist)
3262 if isinstance( exprs, _generatorType ):
3263 exprs = list(exprs)
3264
3265 if isinstance( exprs, basestring ):
3266 self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3267 elif isinstance( exprs, Iterable ):
3268 exprs = list(exprs)
3269 # if sequence of strings provided, wrap with Literal
3270 if all(isinstance(expr, basestring) for expr in exprs):
3271 exprs = map(ParserElement._literalStringClass, exprs)
3272 self.exprs = list(exprs)
3273 else:
3274 try:
3275 self.exprs = list( exprs )
3276 except TypeError:
3277 self.exprs = [ exprs ]
3278 self.callPreparse = False
3279
3280 def __getitem__( self, i ):
3281 return self.exprs[i]
3282
3283 def append( self, other ):
3284 self.exprs.append( other )
3285 self.strRepr = None
3286 return self
3287
3288 def leaveWhitespace( self ):
3289 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3290 all contained expressions."""
3291 self.skipWhitespace = False
3292 self.exprs = [ e.copy() for e in self.exprs ]
3293 for e in self.exprs:
3294 e.leaveWhitespace()
3295 return self
3296
3297 def ignore( self, other ):
3298 if isinstance( other, Suppress ):
3299 if other not in self.ignoreExprs:
3300 super( ParseExpression, self).ignore( other )
3301 for e in self.exprs:
3302 e.ignore( self.ignoreExprs[-1] )
3303 else:
3304 super( ParseExpression, self).ignore( other )
3305 for e in self.exprs:
3306 e.ignore( self.ignoreExprs[-1] )
3307 return self
3308
3309 def __str__( self ):
3310 try:
3311 return super(ParseExpression,self).__str__()
3312 except Exception:
3313 pass
3314
3315 if self.strRepr is None:
3316 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3317 return self.strRepr
3318
3319 def streamline( self ):
3320 super(ParseExpression,self).streamline()
3321
3322 for e in self.exprs:
3323 e.streamline()
3324
3325 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
3326 # but only if there are no parse actions or resultsNames on the nested And's
3327 # (likewise for Or's and MatchFirst's)
3328 if ( len(self.exprs) == 2 ):
3329 other = self.exprs[0]
3330 if ( isinstance( other, self.__class__ ) and
3331 not(other.parseAction) and
3332 other.resultsName is None and
3333 not other.debug ):
3334 self.exprs = other.exprs[:] + [ self.exprs[1] ]
3335 self.strRepr = None
3336 self.mayReturnEmpty |= other.mayReturnEmpty
3337 self.mayIndexError |= other.mayIndexError
3338
3339 other = self.exprs[-1]
3340 if ( isinstance( other, self.__class__ ) and
3341 not(other.parseAction) and
3342 other.resultsName is None and
3343 not other.debug ):
3344 self.exprs = self.exprs[:-1] + other.exprs[:]
3345 self.strRepr = None
3346 self.mayReturnEmpty |= other.mayReturnEmpty
3347 self.mayIndexError |= other.mayIndexError
3348
3349 self.errmsg = "Expected " + _ustr(self)
3350
3351 return self
3352
3353 def setResultsName( self, name, listAllMatches=False ):
3354 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
3355 return ret
3356
3357 def validate( self, validateTrace=[] ):
3358 tmp = validateTrace[:]+[self]
3359 for e in self.exprs:
3360 e.validate(tmp)
3361 self.checkRecursion( [] )
3362
3363 def copy(self):
3364 ret = super(ParseExpression,self).copy()
3365 ret.exprs = [e.copy() for e in self.exprs]
3366 return ret
3367
3368 class And(ParseExpression):
3369 """
3370 Requires all given C{ParseExpression}s to be found in the given order.
3371 Expressions may be separated by whitespace.
3372 May be constructed using the C{'+'} operator.
3373 May also be constructed using the C{'-'} operator, which will suppress backtracking.
3374
3375 Example::
3376 integer = Word(nums)
3377 name_expr = OneOrMore(Word(alphas))
3378
3379 expr = And([integer("id"),name_expr("name"),integer("age")])
3380 # more easily written as:
3381 expr = integer("id") + name_expr("name") + integer("age")
3382 """
3383
3384 class _ErrorStop(Empty):
3385 def __init__(self, *args, **kwargs):
3386 super(And._ErrorStop,self).__init__(*args, **kwargs)
3387 self.name = '-'
3388 self.leaveWhitespace()
3389
3390 def __init__( self, exprs, savelist = True ):
3391 super(And,self).__init__(exprs, savelist)
3392 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3393 self.setWhitespaceChars( self.exprs[0].whiteChars )
3394 self.skipWhitespace = self.exprs[0].skipWhitespace
3395 self.callPreparse = True
3396
3397 def parseImpl( self, instring, loc, doActions=True ):
3398 # pass False as last arg to _parse for first element, since we already
3399 # pre-parsed the string as part of our And pre-parsing
3400 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3401 errorStop = False
3402 for e in self.exprs[1:]:
3403 if isinstance(e, And._ErrorStop):
3404 errorStop = True
3405 continue
3406 if errorStop:
3407 try:
3408 loc, exprtokens = e._parse( instring, loc, doActions )
3409 except ParseSyntaxException:
3410 raise
3411 except ParseBaseException as pe:
3412 pe.__traceback__ = None
3413 raise ParseSyntaxException._from_exception(pe)
3414 except IndexError:
3415 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3416 else:
3417 loc, exprtokens = e._parse( instring, loc, doActions )
3418 if exprtokens or exprtokens.haskeys():
3419 resultlist += exprtokens
3420 return loc, resultlist
3421
3422 def __iadd__(self, other ):
3423 if isinstance( other, basestring ):
3424 other = ParserElement._literalStringClass( other )
3425 return self.append( other ) #And( [ self, other ] )
3426
3427 def checkRecursion( self, parseElementList ):
3428 subRecCheckList = parseElementList[:] + [ self ]
3429 for e in self.exprs:
3430 e.checkRecursion( subRecCheckList )
3431 if not e.mayReturnEmpty:
3432 break
3433
3434 def __str__( self ):
3435 if hasattr(self,"name"):
3436 return self.name
3437
3438 if self.strRepr is None:
3439 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3440
3441 return self.strRepr
3442
3443
3444 class Or(ParseExpression):
3445 """
3446 Requires that at least one C{ParseExpression} is found.
3447 If two expressions match, the expression that matches the longest string will be used.
3448 May be constructed using the C{'^'} operator.
3449
3450 Example::
3451 # construct Or using '^' operator
3452
3453 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3454 print(number.searchString("123 3.1416 789"))
3455 prints::
3456 [['123'], ['3.1416'], ['789']]
3457 """
3458 def __init__( self, exprs, savelist = False ):
3459 super(Or,self).__init__(exprs, savelist)
3460 if self.exprs:
3461 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3462 else:
3463 self.mayReturnEmpty = True
3464
3465 def parseImpl( self, instring, loc, doActions=True ):
3466 maxExcLoc = -1
3467 maxException = None
3468 matches = []
3469 for e in self.exprs:
3470 try:
3471 loc2 = e.tryParse( instring, loc )
3472 except ParseException as err:
3473 err.__traceback__ = None
3474 if err.loc > maxExcLoc:
3475 maxException = err
3476 maxExcLoc = err.loc
3477 except IndexError:
3478 if len(instring) > maxExcLoc:
3479 maxException = ParseException(instring,len(instring),e.errmsg,self)
3480 maxExcLoc = len(instring)
3481 else:
3482 # save match among all matches, to retry longest to shortest
3483 matches.append((loc2, e))
3484
3485 if matches:
3486 matches.sort(key=lambda x: -x[0])
3487 for _,e in matches:
3488 try:
3489 return e._parse( instring, loc, doActions )
3490 except ParseException as err:
3491 err.__traceback__ = None
3492 if err.loc > maxExcLoc:
3493 maxException = err
3494 maxExcLoc = err.loc
3495
3496 if maxException is not None:
3497 maxException.msg = self.errmsg
3498 raise maxException
3499 else:
3500 raise ParseException(instring, loc, "no defined alternatives to match", self)
3501
3502
3503 def __ixor__(self, other ):
3504 if isinstance( other, basestring ):
3505 other = ParserElement._literalStringClass( other )
3506 return self.append( other ) #Or( [ self, other ] )
3507
3508 def __str__( self ):
3509 if hasattr(self,"name"):
3510 return self.name
3511
3512 if self.strRepr is None:
3513 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3514
3515 return self.strRepr
3516
3517 def checkRecursion( self, parseElementList ):
3518 subRecCheckList = parseElementList[:] + [ self ]
3519 for e in self.exprs:
3520 e.checkRecursion( subRecCheckList )
3521
3522
3523 class MatchFirst(ParseExpression):
3524 """
3525 Requires that at least one C{ParseExpression} is found.
3526 If two expressions match, the first one listed is the one that will match.
3527 May be constructed using the C{'|'} operator.
3528
3529 Example::
3530 # construct MatchFirst using '|' operator
3531
3532 # watch the order of expressions to match
3533 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3534 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
3535
3536 # put more selective expression first
3537 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3538 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
3539 """
3540 def __init__( self, exprs, savelist = False ):
3541 super(MatchFirst,self).__init__(exprs, savelist)
3542 if self.exprs:
3543 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3544 else:
3545 self.mayReturnEmpty = True
3546
3547 def parseImpl( self, instring, loc, doActions=True ):
3548 maxExcLoc = -1
3549 maxException = None
3550 for e in self.exprs:
3551 try:
3552 ret = e._parse( instring, loc, doActions )
3553 return ret
3554 except ParseException as err:
3555 if err.loc > maxExcLoc:
3556 maxException = err
3557 maxExcLoc = err.loc
3558 except IndexError:
3559 if len(instring) > maxExcLoc:
3560 maxException = ParseException(instring,len(instring),e.errmsg,self)
3561 maxExcLoc = len(instring)
3562
3563 # only got here if no expression matched, raise exception for match that made it the furthest
3564 else:
3565 if maxException is not None:
3566 maxException.msg = self.errmsg
3567 raise maxException
3568 else:
3569 raise ParseException(instring, loc, "no defined alternatives to match", self)
3570
3571 def __ior__(self, other ):
3572 if isinstance( other, basestring ):
3573 other = ParserElement._literalStringClass( other )
3574 return self.append( other ) #MatchFirst( [ self, other ] )
3575
3576 def __str__( self ):
3577 if hasattr(self,"name"):
3578 return self.name
3579
3580 if self.strRepr is None:
3581 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3582
3583 return self.strRepr
3584
3585 def checkRecursion( self, parseElementList ):
3586 subRecCheckList = parseElementList[:] + [ self ]
3587 for e in self.exprs:
3588 e.checkRecursion( subRecCheckList )
3589
3590
3591 class Each(ParseExpression):
3592 """
3593 Requires all given C{ParseExpression}s to be found, but in any order.
3594 Expressions may be separated by whitespace.
3595 May be constructed using the C{'&'} operator.
3596
3597 Example::
3598 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3599 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3600 integer = Word(nums)
3601 shape_attr = "shape:" + shape_type("shape")
3602 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3603 color_attr = "color:" + color("color")
3604 size_attr = "size:" + integer("size")
3605
3606 # use Each (using operator '&') to accept attributes in any order
3607 # (shape and posn are required, color and size are optional)
3608 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3609
3610 shape_spec.runTests('''
3611 shape: SQUARE color: BLACK posn: 100, 120
3612 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3613 color:GREEN size:20 shape:TRIANGLE posn:20,40
3614 '''
3615 )
3616 prints::
3617 shape: SQUARE color: BLACK posn: 100, 120
3618 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3619 - color: BLACK
3620 - posn: ['100', ',', '120']
3621 - x: 100
3622 - y: 120
3623 - shape: SQUARE
3624
3625
3626 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3627 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3628 - color: BLUE
3629 - posn: ['50', ',', '80']
3630 - x: 50
3631 - y: 80
3632 - shape: CIRCLE
3633 - size: 50
3634
3635
3636 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3637 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3638 - color: GREEN
3639 - posn: ['20', ',', '40']
3640 - x: 20
3641 - y: 40
3642 - shape: TRIANGLE
3643 - size: 20
3644 """
3645 def __init__( self, exprs, savelist = True ):
3646 super(Each,self).__init__(exprs, savelist)
3647 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3648 self.skipWhitespace = True
3649 self.initExprGroups = True
3650
3651 def parseImpl( self, instring, loc, doActions=True ):
3652 if self.initExprGroups:
3653 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3654 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3655 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3656 self.optionals = opt1 + opt2
3657 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3658 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3659 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3660 self.required += self.multirequired
3661 self.initExprGroups = False
3662 tmpLoc = loc
3663 tmpReqd = self.required[:]
3664 tmpOpt = self.optionals[:]
3665 matchOrder = []
3666
3667 keepMatching = True
3668 while keepMatching:
3669 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3670 failed = []
3671 for e in tmpExprs:
3672 try:
3673 tmpLoc = e.tryParse( instring, tmpLoc )
3674 except ParseException:
3675 failed.append(e)
3676 else:
3677 matchOrder.append(self.opt1map.get(id(e),e))
3678 if e in tmpReqd:
3679 tmpReqd.remove(e)
3680 elif e in tmpOpt:
3681 tmpOpt.remove(e)
3682 if len(failed) == len(tmpExprs):
3683 keepMatching = False
3684
3685 if tmpReqd:
3686 missing = ", ".join(_ustr(e) for e in tmpReqd)
3687 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3688
3689 # add any unmatched Optionals, in case they have default values defined
3690 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3691
3692 resultlist = []
3693 for e in matchOrder:
3694 loc,results = e._parse(instring,loc,doActions)
3695 resultlist.append(results)
3696
3697 finalResults = sum(resultlist, ParseResults([]))
3698 return loc, finalResults
3699
3700 def __str__( self ):
3701 if hasattr(self,"name"):
3702 return self.name
3703
3704 if self.strRepr is None:
3705 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3706
3707 return self.strRepr
3708
3709 def checkRecursion( self, parseElementList ):
3710 subRecCheckList = parseElementList[:] + [ self ]
3711 for e in self.exprs:
3712 e.checkRecursion( subRecCheckList )
3713
3714
3715 class ParseElementEnhance(ParserElement):
3716 """
3717 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3718 """
3719 def __init__( self, expr, savelist=False ):
3720 super(ParseElementEnhance,self).__init__(savelist)
3721 if isinstance( expr, basestring ):
3722 if issubclass(ParserElement._literalStringClass, Token):
3723 expr = ParserElement._literalStringClass(expr)
3724 else:
3725 expr = ParserElement._literalStringClass(Literal(expr))
3726 self.expr = expr
3727 self.strRepr = None
3728 if expr is not None:
3729 self.mayIndexError = expr.mayIndexError
3730 self.mayReturnEmpty = expr.mayReturnEmpty
3731 self.setWhitespaceChars( expr.whiteChars )
3732 self.skipWhitespace = expr.skipWhitespace
3733 self.saveAsList = expr.saveAsList
3734 self.callPreparse = expr.callPreparse
3735 self.ignoreExprs.extend(expr.ignoreExprs)
3736
3737 def parseImpl( self, instring, loc, doActions=True ):
3738 if self.expr is not None:
3739 return self.expr._parse( instring, loc, doActions, callPreParse=False )
3740 else:
3741 raise ParseException("",loc,self.errmsg,self)
3742
3743 def leaveWhitespace( self ):
3744 self.skipWhitespace = False
3745 self.expr = self.expr.copy()
3746 if self.expr is not None:
3747 self.expr.leaveWhitespace()
3748 return self
3749
3750 def ignore( self, other ):
3751 if isinstance( other, Suppress ):
3752 if other not in self.ignoreExprs:
3753 super( ParseElementEnhance, self).ignore( other )
3754 if self.expr is not None:
3755 self.expr.ignore( self.ignoreExprs[-1] )
3756 else:
3757 super( ParseElementEnhance, self).ignore( other )
3758 if self.expr is not None:
3759 self.expr.ignore( self.ignoreExprs[-1] )
3760 return self
3761
3762 def streamline( self ):
3763 super(ParseElementEnhance,self).streamline()
3764 if self.expr is not None:
3765 self.expr.streamline()
3766 return self
3767
3768 def checkRecursion( self, parseElementList ):
3769 if self in parseElementList:
3770 raise RecursiveGrammarException( parseElementList+[self] )
3771 subRecCheckList = parseElementList[:] + [ self ]
3772 if self.expr is not None:
3773 self.expr.checkRecursion( subRecCheckList )
3774
3775 def validate( self, validateTrace=[] ):
3776 tmp = validateTrace[:]+[self]
3777 if self.expr is not None:
3778 self.expr.validate(tmp)
3779 self.checkRecursion( [] )
3780
3781 def __str__( self ):
3782 try:
3783 return super(ParseElementEnhance,self).__str__()
3784 except Exception:
3785 pass
3786
3787 if self.strRepr is None and self.expr is not None:
3788 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3789 return self.strRepr
3790
3791
3792 class FollowedBy(ParseElementEnhance):
3793 """
3794 Lookahead matching of the given parse expression. C{FollowedBy}
3795 does I{not} advance the parsing position within the input string, it only
3796 verifies that the specified parse expression matches at the current
3797 position. C{FollowedBy} always returns a null token list.
3798
3799 Example::
3800 # use FollowedBy to match a label only if it is followed by a ':'
3801 data_word = Word(alphas)
3802 label = data_word + FollowedBy(':')
3803 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3804
3805 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3806 prints::
3807 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3808 """
3809 def __init__( self, expr ):
3810 super(FollowedBy,self).__init__(expr)
3811 self.mayReturnEmpty = True
3812
3813 def parseImpl( self, instring, loc, doActions=True ):
3814 self.expr.tryParse( instring, loc )
3815 return loc, []
3816
3817
3818 class NotAny(ParseElementEnhance):
3819 """
3820 Lookahead to disallow matching with the given parse expression. C{NotAny}
3821 does I{not} advance the parsing position within the input string, it only
3822 verifies that the specified parse expression does I{not} match at the current
3823 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3824 always returns a null token list. May be constructed using the '~' operator.
3825
3826 Example::
3827
3828 """
3829 def __init__( self, expr ):
3830 super(NotAny,self).__init__(expr)
3831 #~ self.leaveWhitespace()
3832 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
3833 self.mayReturnEmpty = True
3834 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3835
3836 def parseImpl( self, instring, loc, doActions=True ):
3837 if self.expr.canParseNext(instring, loc):
3838 raise ParseException(instring, loc, self.errmsg, self)
3839 return loc, []
3840
3841 def __str__( self ):
3842 if hasattr(self,"name"):
3843 return self.name
3844
3845 if self.strRepr is None:
3846 self.strRepr = "~{" + _ustr(self.expr) + "}"
3847
3848 return self.strRepr
3849
3850 class _MultipleMatch(ParseElementEnhance):
3851 def __init__( self, expr, stopOn=None):
3852 super(_MultipleMatch, self).__init__(expr)
3853 self.saveAsList = True
3854 ender = stopOn
3855 if isinstance(ender, basestring):
3856 ender = ParserElement._literalStringClass(ender)
3857 self.not_ender = ~ender if ender is not None else None
3858
3859 def parseImpl( self, instring, loc, doActions=True ):
3860 self_expr_parse = self.expr._parse
3861 self_skip_ignorables = self._skipIgnorables
3862 check_ender = self.not_ender is not None
3863 if check_ender:
3864 try_not_ender = self.not_ender.tryParse
3865
3866 # must be at least one (but first see if we are the stopOn sentinel;
3867 # if so, fail)
3868 if check_ender:
3869 try_not_ender(instring, loc)
3870 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3871 try:
3872 hasIgnoreExprs = (not not self.ignoreExprs)
3873 while 1:
3874 if check_ender:
3875 try_not_ender(instring, loc)
3876 if hasIgnoreExprs:
3877 preloc = self_skip_ignorables( instring, loc )
3878 else:
3879 preloc = loc
3880 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3881 if tmptokens or tmptokens.haskeys():
3882 tokens += tmptokens
3883 except (ParseException,IndexError):
3884 pass
3885
3886 return loc, tokens
3887
3888 class OneOrMore(_MultipleMatch):
3889 """
3890 Repetition of one or more of the given expression.
3891
3892 Parameters:
3893 - expr - expression that must match one or more times
3894 - stopOn - (default=C{None}) - expression for a terminating sentinel
3895 (only required if the sentinel would ordinarily match the repetition
3896 expression)
3897
3898 Example::
3899 data_word = Word(alphas)
3900 label = data_word + FollowedBy(':')
3901 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3902
3903 text = "shape: SQUARE posn: upper left color: BLACK"
3904 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3905
3906 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3907 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3908 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3909
3910 # could also be written as
3911 (attr_expr * (1,)).parseString(text).pprint()
3912 """
3913
3914 def __str__( self ):
3915 if hasattr(self,"name"):
3916 return self.name
3917
3918 if self.strRepr is None:
3919 self.strRepr = "{" + _ustr(self.expr) + "}..."
3920
3921 return self.strRepr
3922
3923 class ZeroOrMore(_MultipleMatch):
3924 """
3925 Optional repetition of zero or more of the given expression.
3926
3927 Parameters:
3928 - expr - expression that must match zero or more times
3929 - stopOn - (default=C{None}) - expression for a terminating sentinel
3930 (only required if the sentinel would ordinarily match the repetition
3931 expression)
3932
3933 Example: similar to L{OneOrMore}
3934 """
3935 def __init__( self, expr, stopOn=None):
3936 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
3937 self.mayReturnEmpty = True
3938
3939 def parseImpl( self, instring, loc, doActions=True ):
3940 try:
3941 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
3942 except (ParseException,IndexError):
3943 return loc, []
3944
3945 def __str__( self ):
3946 if hasattr(self,"name"):
3947 return self.name
3948
3949 if self.strRepr is None:
3950 self.strRepr = "[" + _ustr(self.expr) + "]..."
3951
3952 return self.strRepr
3953
3954 class _NullToken(object):
3955 def __bool__(self):
3956 return False
3957 __nonzero__ = __bool__
3958 def __str__(self):
3959 return ""
3960
3961 _optionalNotMatched = _NullToken()
3962 class Optional(ParseElementEnhance):
3963 """
3964 Optional matching of the given expression.
3965
3966 Parameters:
3967 - expr - expression that must match zero or more times
3968 - default (optional) - value to be returned if the optional expression is not found.
3969
3970 Example::
3971 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3972 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3973 zip.runTests('''
3974 # traditional ZIP code
3975 12345
3976
3977 # ZIP+4 form
3978 12101-0001
3979
3980 # invalid ZIP
3981 98765-
3982 ''')
3983 prints::
3984 # traditional ZIP code
3985 12345
3986 ['12345']
3987
3988 # ZIP+4 form
3989 12101-0001
3990 ['12101-0001']
3991
3992 # invalid ZIP
3993 98765-
3994 ^
3995 FAIL: Expected end of text (at char 5), (line:1, col:6)
3996 """
3997 def __init__( self, expr, default=_optionalNotMatched ):
3998 super(Optional,self).__init__( expr, savelist=False )
3999 self.saveAsList = self.expr.saveAsList
4000 self.defaultValue = default
4001 self.mayReturnEmpty = True
4002
4003 def parseImpl( self, instring, loc, doActions=True ):
4004 try:
4005 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
4006 except (ParseException,IndexError):
4007 if self.defaultValue is not _optionalNotMatched:
4008 if self.expr.resultsName:
4009 tokens = ParseResults([ self.defaultValue ])
4010 tokens[self.expr.resultsName] = self.defaultValue
4011 else:
4012 tokens = [ self.defaultValue ]
4013 else:
4014 tokens = []
4015 return loc, tokens
4016
4017 def __str__( self ):
4018 if hasattr(self,"name"):
4019 return self.name
4020
4021 if self.strRepr is None:
4022 self.strRepr = "[" + _ustr(self.expr) + "]"
4023
4024 return self.strRepr
4025
4026 class SkipTo(ParseElementEnhance):
4027 """
4028 Token for skipping over all undefined text until the matched expression is found.
4029
4030 Parameters:
4031 - expr - target expression marking the end of the data to be skipped
4032 - include - (default=C{False}) if True, the target expression is also parsed
4033 (the skipped text and target expression are returned as a 2-element list).
4034 - ignore - (default=C{None}) used to define grammars (typically quoted strings and
4035 comments) that might contain false matches to the target expression
4036 - failOn - (default=C{None}) define expressions that are not allowed to be
4037 included in the skipped test; if found before the target expression is found,
4038 the SkipTo is not a match
4039
4040 Example::
4041 report = '''
4042 Outstanding Issues Report - 1 Jan 2000
4043
4044 # | Severity | Description | Days Open
4045 -----+----------+-------------------------------------------+-----------
4046 101 | Critical | Intermittent system crash | 6
4047 94 | Cosmetic | Spelling error on Login ('log|n') | 14
4048 79 | Minor | System slow when running too many reports | 47
4049 '''
4050 integer = Word(nums)
4051 SEP = Suppress('|')
4052 # use SkipTo to simply match everything up until the next SEP
4053 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4054 # - parse action will call token.strip() for each matched token, i.e., the description body
4055 string_data = SkipTo(SEP, ignore=quotedString)
4056 string_data.setParseAction(tokenMap(str.strip))
4057 ticket_expr = (integer("issue_num") + SEP
4058 + string_data("sev") + SEP
4059 + string_data("desc") + SEP
4060 + integer("days_open"))
4061
4062 for tkt in ticket_expr.searchString(report):
4063 print tkt.dump()
4064 prints::
4065 ['101', 'Critical', 'Intermittent system crash', '6']
4066 - days_open: 6
4067 - desc: Intermittent system crash
4068 - issue_num: 101
4069 - sev: Critical
4070 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4071 - days_open: 14
4072 - desc: Spelling error on Login ('log|n')
4073 - issue_num: 94
4074 - sev: Cosmetic
4075 ['79', 'Minor', 'System slow when running too many reports', '47']
4076 - days_open: 47
4077 - desc: System slow when running too many reports
4078 - issue_num: 79
4079 - sev: Minor
4080 """
4081 def __init__( self, other, include=False, ignore=None, failOn=None ):
4082 super( SkipTo, self ).__init__( other )
4083 self.ignoreExpr = ignore
4084 self.mayReturnEmpty = True
4085 self.mayIndexError = False
4086 self.includeMatch = include
4087 self.asList = False
4088 if isinstance(failOn, basestring):
4089 self.failOn = ParserElement._literalStringClass(failOn)
4090 else:
4091 self.failOn = failOn
4092 self.errmsg = "No match found for "+_ustr(self.expr)
4093
4094 def parseImpl( self, instring, loc, doActions=True ):
4095 startloc = loc
4096 instrlen = len(instring)
4097 expr = self.expr
4098 expr_parse = self.expr._parse
4099 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4100 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4101
4102 tmploc = loc
4103 while tmploc <= instrlen:
4104 if self_failOn_canParseNext is not None:
4105 # break if failOn expression matches
4106 if self_failOn_canParseNext(instring, tmploc):
4107 break
4108
4109 if self_ignoreExpr_tryParse is not None:
4110 # advance past ignore expressions
4111 while 1:
4112 try:
4113 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4114 except ParseBaseException:
4115 break
4116
4117 try:
4118 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4119 except (ParseException, IndexError):
4120 # no match, advance loc in string
4121 tmploc += 1
4122 else:
4123 # matched skipto expr, done
4124 break
4125
4126 else:
4127 # ran off the end of the input string without matching skipto expr, fail
4128 raise ParseException(instring, loc, self.errmsg, self)
4129
4130 # build up return values
4131 loc = tmploc
4132 skiptext = instring[startloc:loc]
4133 skipresult = ParseResults(skiptext)
4134
4135 if self.includeMatch:
4136 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4137 skipresult += mat
4138
4139 return loc, skipresult
4140
4141 class Forward(ParseElementEnhance):
4142 """
4143 Forward declaration of an expression to be defined later -
4144 used for recursive grammars, such as algebraic infix notation.
4145 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4146
4147 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4148 Specifically, '|' has a lower precedence than '<<', so that::
4149 fwdExpr << a | b | c
4150 will actually be evaluated as::
4151 (fwdExpr << a) | b | c
4152 thereby leaving b and c out as parseable alternatives. It is recommended that you
4153 explicitly group the values inserted into the C{Forward}::
4154 fwdExpr << (a | b | c)
4155 Converting to use the '<<=' operator instead will avoid this problem.
4156
4157 See L{ParseResults.pprint} for an example of a recursive parser created using
4158 C{Forward}.
4159 """
4160 def __init__( self, other=None ):
4161 super(Forward,self).__init__( other, savelist=False )
4162
4163 def __lshift__( self, other ):
4164 if isinstance( other, basestring ):
4165 other = ParserElement._literalStringClass(other)
4166 self.expr = other
4167 self.strRepr = None
4168 self.mayIndexError = self.expr.mayIndexError
4169 self.mayReturnEmpty = self.expr.mayReturnEmpty
4170 self.setWhitespaceChars( self.expr.whiteChars )
4171 self.skipWhitespace = self.expr.skipWhitespace
4172 self.saveAsList = self.expr.saveAsList
4173 self.ignoreExprs.extend(self.expr.ignoreExprs)
4174 return self
4175
4176 def __ilshift__(self, other):
4177 return self << other
4178
4179 def leaveWhitespace( self ):
4180 self.skipWhitespace = False
4181 return self
4182
4183 def streamline( self ):
4184 if not self.streamlined:
4185 self.streamlined = True
4186 if self.expr is not None:
4187 self.expr.streamline()
4188 return self
4189
4190 def validate( self, validateTrace=[] ):
4191 if self not in validateTrace:
4192 tmp = validateTrace[:]+[self]
4193 if self.expr is not None:
4194 self.expr.validate(tmp)
4195 self.checkRecursion([])
4196
4197 def __str__( self ):
4198 if hasattr(self,"name"):
4199 return self.name
4200 return self.__class__.__name__ + ": ..."
4201
4202 # stubbed out for now - creates awful memory and perf issues
4203 self._revertClass = self.__class__
4204 self.__class__ = _ForwardNoRecurse
4205 try:
4206 if self.expr is not None:
4207 retString = _ustr(self.expr)
4208 else:
4209 retString = "None"
4210 finally:
4211 self.__class__ = self._revertClass
4212 return self.__class__.__name__ + ": " + retString
4213
4214 def copy(self):
4215 if self.expr is not None:
4216 return super(Forward,self).copy()
4217 else:
4218 ret = Forward()
4219 ret <<= self
4220 return ret
4221
4222 class _ForwardNoRecurse(Forward):
4223 def __str__( self ):
4224 return "..."
4225
4226 class TokenConverter(ParseElementEnhance):
4227 """
4228 Abstract subclass of C{ParseExpression}, for converting parsed results.
4229 """
4230 def __init__( self, expr, savelist=False ):
4231 super(TokenConverter,self).__init__( expr )#, savelist )
4232 self.saveAsList = False
4233
4234 class Combine(TokenConverter):
4235 """
4236 Converter to concatenate all matching tokens to a single string.
4237 By default, the matching patterns must also be contiguous in the input string;
4238 this can be disabled by specifying C{'adjacent=False'} in the constructor.
4239
4240 Example::
4241 real = Word(nums) + '.' + Word(nums)
4242 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4243 # will also erroneously match the following
4244 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4245
4246 real = Combine(Word(nums) + '.' + Word(nums))
4247 print(real.parseString('3.1416')) # -> ['3.1416']
4248 # no match when there are internal spaces
4249 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4250 """
4251 def __init__( self, expr, joinString="", adjacent=True ):
4252 super(Combine,self).__init__( expr )
4253 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
4254 if adjacent:
4255 self.leaveWhitespace()
4256 self.adjacent = adjacent
4257 self.skipWhitespace = True
4258 self.joinString = joinString
4259 self.callPreparse = True
4260
4261 def ignore( self, other ):
4262 if self.adjacent:
4263 ParserElement.ignore(self, other)
4264 else:
4265 super( Combine, self).ignore( other )
4266 return self
4267
4268 def postParse( self, instring, loc, tokenlist ):
4269 retToks = tokenlist.copy()
4270 del retToks[:]
4271 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4272
4273 if self.resultsName and retToks.haskeys():
4274 return [ retToks ]
4275 else:
4276 return retToks
4277
4278 class Group(TokenConverter):
4279 """
4280 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4281
4282 Example::
4283 ident = Word(alphas)
4284 num = Word(nums)
4285 term = ident | num
4286 func = ident + Optional(delimitedList(term))
4287 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
4288
4289 func = ident + Group(Optional(delimitedList(term)))
4290 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
4291 """
4292 def __init__( self, expr ):
4293 super(Group,self).__init__( expr )
4294 self.saveAsList = True
4295
4296 def postParse( self, instring, loc, tokenlist ):
4297 return [ tokenlist ]
4298
4299 class Dict(TokenConverter):
4300 """
4301 Converter to return a repetitive expression as a list, but also as a dictionary.
4302 Each element can also be referenced using the first token in the expression as its key.
4303 Useful for tabular report scraping when the first column can be used as a item key.
4304
4305 Example::
4306 data_word = Word(alphas)
4307 label = data_word + FollowedBy(':')
4308 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4309
4310 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4311 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4312
4313 # print attributes as plain groups
4314 print(OneOrMore(attr_expr).parseString(text).dump())
4315
4316 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4317 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4318 print(result.dump())
4319
4320 # access named fields as dict entries, or output as dict
4321 print(result['shape'])
4322 print(result.asDict())
4323 prints::
4324 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4325
4326 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4327 - color: light blue
4328 - posn: upper left
4329 - shape: SQUARE
4330 - texture: burlap
4331 SQUARE
4332 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4333 See more examples at L{ParseResults} of accessing fields by results name.
4334 """
4335 def __init__( self, expr ):
4336 super(Dict,self).__init__( expr )
4337 self.saveAsList = True
4338
4339 def postParse( self, instring, loc, tokenlist ):
4340 for i,tok in enumerate(tokenlist):
4341 if len(tok) == 0:
4342 continue
4343 ikey = tok[0]
4344 if isinstance(ikey,int):
4345 ikey = _ustr(tok[0]).strip()
4346 if len(tok)==1:
4347 tokenlist[ikey] = _ParseResultsWithOffset("",i)
4348 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4349 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4350 else:
4351 dictvalue = tok.copy() #ParseResults(i)
4352 del dictvalue[0]
4353 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4354 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4355 else:
4356 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4357
4358 if self.resultsName:
4359 return [ tokenlist ]
4360 else:
4361 return tokenlist
4362
4363
4364 class Suppress(TokenConverter):
4365 """
4366 Converter for ignoring the results of a parsed expression.
4367
4368 Example::
4369 source = "a, b, c,d"
4370 wd = Word(alphas)
4371 wd_list1 = wd + ZeroOrMore(',' + wd)
4372 print(wd_list1.parseString(source))
4373
4374 # often, delimiters that are useful during parsing are just in the
4375 # way afterward - use Suppress to keep them out of the parsed output
4376 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4377 print(wd_list2.parseString(source))
4378 prints::
4379 ['a', ',', 'b', ',', 'c', ',', 'd']
4380 ['a', 'b', 'c', 'd']
4381 (See also L{delimitedList}.)
4382 """
4383 def postParse( self, instring, loc, tokenlist ):
4384 return []
4385
4386 def suppress( self ):
4387 return self
4388
4389
4390 class OnlyOnce(object):
4391 """
4392 Wrapper for parse actions, to ensure they are only called once.
4393 """
4394 def __init__(self, methodCall):
4395 self.callable = _trim_arity(methodCall)
4396 self.called = False
4397 def __call__(self,s,l,t):
4398 if not self.called:
4399 results = self.callable(s,l,t)
4400 self.called = True
4401 return results
4402 raise ParseException(s,l,"")
4403 def reset(self):
4404 self.called = False
4405
4406 def traceParseAction(f):
4407 """
4408 Decorator for debugging parse actions.
4409
4410 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
4411 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
4412
4413 Example::
4414 wd = Word(alphas)
4415
4416 @traceParseAction
4417 def remove_duplicate_chars(tokens):
4418 return ''.join(sorted(set(''.join(tokens))))
4419
4420 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4421 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4422 prints::
4423 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4424 <<leaving remove_duplicate_chars (ret: 'dfjkls')
4425 ['dfjkls']
4426 """
4427 f = _trim_arity(f)
4428 def z(*paArgs):
4429 thisFunc = f.__name__
4430 s,l,t = paArgs[-3:]
4431 if len(paArgs)>3:
4432 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4433 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4434 try:
4435 ret = f(*paArgs)
4436 except Exception as exc:
4437 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4438 raise
4439 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4440 return ret
4441 try:
4442 z.__name__ = f.__name__
4443 except AttributeError:
4444 pass
4445 return z
4446
4447 #
4448 # global helpers
4449 #
4450 def delimitedList( expr, delim=",", combine=False ):
4451 """
4452 Helper to define a delimited list of expressions - the delimiter defaults to ','.
4453 By default, the list elements and delimiters can have intervening whitespace, and
4454 comments, but this can be overridden by passing C{combine=True} in the constructor.
4455 If C{combine} is set to C{True}, the matching tokens are returned as a single token
4456 string, with the delimiters included; otherwise, the matching tokens are returned
4457 as a list of tokens, with the delimiters suppressed.
4458
4459 Example::
4460 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4461 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4462 """
4463 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4464 if combine:
4465 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4466 else:
4467 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4468
4469 def countedArray( expr, intExpr=None ):
4470 """
4471 Helper to define a counted list of expressions.
4472 This helper defines a pattern of the form::
4473 integer expr expr expr...
4474 where the leading integer tells how many expr expressions follow.
4475 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4476
4477 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
4478
4479 Example::
4480 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
4481
4482 # in this parser, the leading integer value is given in binary,
4483 # '10' indicating that 2 values are in the array
4484 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4485 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
4486 """
4487 arrayExpr = Forward()
4488 def countFieldParseAction(s,l,t):
4489 n = t[0]
4490 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4491 return []
4492 if intExpr is None:
4493 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4494 else:
4495 intExpr = intExpr.copy()
4496 intExpr.setName("arrayLen")
4497 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4498 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4499
4500 def _flatten(L):
4501 ret = []
4502 for i in L:
4503 if isinstance(i,list):
4504 ret.extend(_flatten(i))
4505 else:
4506 ret.append(i)
4507 return ret
4508
4509 def matchPreviousLiteral(expr):
4510 """
4511 Helper to define an expression that is indirectly defined from
4512 the tokens matched in a previous expression, that is, it looks
4513 for a 'repeat' of a previous expression. For example::
4514 first = Word(nums)
4515 second = matchPreviousLiteral(first)
4516 matchExpr = first + ":" + second
4517 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
4518 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4519 If this is not desired, use C{matchPreviousExpr}.
4520 Do I{not} use with packrat parsing enabled.
4521 """
4522 rep = Forward()
4523 def copyTokenToRepeater(s,l,t):
4524 if t:
4525 if len(t) == 1:
4526 rep << t[0]
4527 else:
4528 # flatten t tokens
4529 tflat = _flatten(t.asList())
4530 rep << And(Literal(tt) for tt in tflat)
4531 else:
4532 rep << Empty()
4533 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4534 rep.setName('(prev) ' + _ustr(expr))
4535 return rep
4536
4537 def matchPreviousExpr(expr):
4538 """
4539 Helper to define an expression that is indirectly defined from
4540 the tokens matched in a previous expression, that is, it looks
4541 for a 'repeat' of a previous expression. For example::
4542 first = Word(nums)
4543 second = matchPreviousExpr(first)
4544 matchExpr = first + ":" + second
4545 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
4546 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4547 the expressions are evaluated first, and then compared, so
4548 C{"1"} is compared with C{"10"}.
4549 Do I{not} use with packrat parsing enabled.
4550 """
4551 rep = Forward()
4552 e2 = expr.copy()
4553 rep <<= e2
4554 def copyTokenToRepeater(s,l,t):
4555 matchTokens = _flatten(t.asList())
4556 def mustMatchTheseTokens(s,l,t):
4557 theseTokens = _flatten(t.asList())
4558 if theseTokens != matchTokens:
4559 raise ParseException("",0,"")
4560 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4561 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4562 rep.setName('(prev) ' + _ustr(expr))
4563 return rep
4564
4565 def _escapeRegexRangeChars(s):
4566 #~ escape these chars: ^-]
4567 for c in r"\^-]":
4568 s = s.replace(c,_bslash+c)
4569 s = s.replace("\n",r"\n")
4570 s = s.replace("\t",r"\t")
4571 return _ustr(s)
4572
4573 def oneOf( strs, caseless=False, useRegex=True ):
4574 """
4575 Helper to quickly define a set of alternative Literals, and makes sure to do
4576 longest-first testing when there is a conflict, regardless of the input order,
4577 but returns a C{L{MatchFirst}} for best performance.
4578
4579 Parameters:
4580 - strs - a string of space-delimited literals, or a collection of string literals
4581 - caseless - (default=C{False}) - treat all literals as caseless
4582 - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4583 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4584 if creating a C{Regex} raises an exception)
4585
4586 Example::
4587 comp_oper = oneOf("< = > <= >= !=")
4588 var = Word(alphas)
4589 number = Word(nums)
4590 term = var | number
4591 comparison_expr = term + comp_oper + term
4592 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
4593 prints::
4594 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4595 """
4596 if caseless:
4597 isequal = ( lambda a,b: a.upper() == b.upper() )
4598 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4599 parseElementClass = CaselessLiteral
4600 else:
4601 isequal = ( lambda a,b: a == b )
4602 masks = ( lambda a,b: b.startswith(a) )
4603 parseElementClass = Literal
4604
4605 symbols = []
4606 if isinstance(strs,basestring):
4607 symbols = strs.split()
4608 elif isinstance(strs, Iterable):
4609 symbols = list(strs)
4610 else:
4611 warnings.warn("Invalid argument to oneOf, expected string or iterable",
4612 SyntaxWarning, stacklevel=2)
4613 if not symbols:
4614 return NoMatch()
4615
4616 i = 0
4617 while i < len(symbols)-1:
4618 cur = symbols[i]
4619 for j,other in enumerate(symbols[i+1:]):
4620 if ( isequal(other, cur) ):
4621 del symbols[i+j+1]
4622 break
4623 elif ( masks(cur, other) ):
4624 del symbols[i+j+1]
4625 symbols.insert(i,other)
4626 cur = other
4627 break
4628 else:
4629 i += 1
4630
4631 if not caseless and useRegex:
4632 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
4633 try:
4634 if len(symbols)==len("".join(symbols)):
4635 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4636 else:
4637 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4638 except Exception:
4639 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4640 SyntaxWarning, stacklevel=2)
4641
4642
4643 # last resort, just use MatchFirst
4644 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4645
4646 def dictOf( key, value ):
4647 """
4648 Helper to easily and clearly define a dictionary by specifying the respective patterns
4649 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4650 in the proper order. The key pattern can include delimiting markers or punctuation,
4651 as long as they are suppressed, thereby leaving the significant key text. The value
4652 pattern can include named results, so that the C{Dict} results can include named token
4653 fields.
4654
4655 Example::
4656 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4657 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4658 print(OneOrMore(attr_expr).parseString(text).dump())
4659
4660 attr_label = label
4661 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4662
4663 # similar to Dict, but simpler call format
4664 result = dictOf(attr_label, attr_value).parseString(text)
4665 print(result.dump())
4666 print(result['shape'])
4667 print(result.shape) # object attribute access works too
4668 print(result.asDict())
4669 prints::
4670 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4671 - color: light blue
4672 - posn: upper left
4673 - shape: SQUARE
4674 - texture: burlap
4675 SQUARE
4676 SQUARE
4677 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4678 """
4679 return Dict( ZeroOrMore( Group ( key + value ) ) )
4680
4681 def originalTextFor(expr, asString=True):
4682 """
4683 Helper to return the original, untokenized text for a given expression. Useful to
4684 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4685 revert separate tokens with intervening whitespace back to the original matching
4686 input text. By default, returns astring containing the original parsed text.
4687
4688 If the optional C{asString} argument is passed as C{False}, then the return value is a
4689 C{L{ParseResults}} containing any results names that were originally matched, and a
4690 single token containing the original matched text from the input string. So if
4691 the expression passed to C{L{originalTextFor}} contains expressions with defined
4692 results names, you must set C{asString} to C{False} if you want to preserve those
4693 results name values.
4694
4695 Example::
4696 src = "this is test <b> bold <i>text</i> </b> normal text "
4697 for tag in ("b","i"):
4698 opener,closer = makeHTMLTags(tag)
4699 patt = originalTextFor(opener + SkipTo(closer) + closer)
4700 print(patt.searchString(src)[0])
4701 prints::
4702 ['<b> bold <i>text</i> </b>']
4703 ['<i>text</i>']
4704 """
4705 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4706 endlocMarker = locMarker.copy()
4707 endlocMarker.callPreparse = False
4708 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4709 if asString:
4710 extractText = lambda s,l,t: s[t._original_start:t._original_end]
4711 else:
4712 def extractText(s,l,t):
4713 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4714 matchExpr.setParseAction(extractText)
4715 matchExpr.ignoreExprs = expr.ignoreExprs
4716 return matchExpr
4717
4718 def ungroup(expr):
4719 """
4720 Helper to undo pyparsing's default grouping of And expressions, even
4721 if all but one are non-empty.
4722 """
4723 return TokenConverter(expr).setParseAction(lambda t:t[0])
4724
4725 def locatedExpr(expr):
4726 """
4727 Helper to decorate a returned token with its starting and ending locations in the input string.
4728 This helper adds the following results names:
4729 - locn_start = location where matched expression begins
4730 - locn_end = location where matched expression ends
4731 - value = the actual parsed results
4732
4733 Be careful if the input text contains C{<TAB>} characters, you may want to call
4734 C{L{ParserElement.parseWithTabs}}
4735
4736 Example::
4737 wd = Word(alphas)
4738 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4739 print(match)
4740 prints::
4741 [[0, 'ljsdf', 5]]
4742 [[8, 'lksdjjf', 15]]
4743 [[18, 'lkkjj', 23]]
4744 """
4745 locator = Empty().setParseAction(lambda s,l,t: l)
4746 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4747
4748
4749 # convenience constants for positional expressions
4750 empty = Empty().setName("empty")
4751 lineStart = LineStart().setName("lineStart")
4752 lineEnd = LineEnd().setName("lineEnd")
4753 stringStart = StringStart().setName("stringStart")
4754 stringEnd = StringEnd().setName("stringEnd")
4755
4756 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4757 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4758 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4759 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
4760 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
4761 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4762
4763 def srange(s):
4764 r"""
4765 Helper to easily define string ranges for use in Word construction. Borrows
4766 syntax from regexp '[]' string range definitions::
4767 srange("[0-9]") -> "0123456789"
4768 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
4769 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4770 The input string must be enclosed in []'s, and the returned string is the expanded
4771 character set joined into a single string.
4772 The values enclosed in the []'s may be:
4773 - a single character
4774 - an escaped character with a leading backslash (such as C{\-} or C{\]})
4775 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4776 (C{\0x##} is also supported for backwards compatibility)
4777 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4778 - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4779 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4780 """
4781 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4782 try:
4783 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4784 except Exception:
4785 return ""
4786
4787 def matchOnlyAtCol(n):
4788 """
4789 Helper method for defining parse actions that require matching at a specific
4790 column in the input text.
4791 """
4792 def verifyCol(strg,locn,toks):
4793 if col(locn,strg) != n:
4794 raise ParseException(strg,locn,"matched token not at column %d" % n)
4795 return verifyCol
4796
4797 def replaceWith(replStr):
4798 """
4799 Helper method for common parse actions that simply return a literal value. Especially
4800 useful when used with C{L{transformString<ParserElement.transformString>}()}.
4801
4802 Example::
4803 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4804 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4805 term = na | num
4806
4807 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4808 """
4809 return lambda s,l,t: [replStr]
4810
4811 def removeQuotes(s,l,t):
4812 """
4813 Helper parse action for removing quotation marks from parsed quoted strings.
4814
4815 Example::
4816 # by default, quotation marks are included in parsed results
4817 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4818
4819 # use removeQuotes to strip quotation marks from parsed results
4820 quotedString.setParseAction(removeQuotes)
4821 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4822 """
4823 return t[0][1:-1]
4824
4825 def tokenMap(func, *args):
4826 """
4827 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4828 args are passed, they are forwarded to the given function as additional arguments after
4829 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4830 parsed data to an integer using base 16.
4831
4832 Example (compare the last to example in L{ParserElement.transformString}::
4833 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4834 hex_ints.runTests('''
4835 00 11 22 aa FF 0a 0d 1a
4836 ''')
4837
4838 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4839 OneOrMore(upperword).runTests('''
4840 my kingdom for a horse
4841 ''')
4842
4843 wd = Word(alphas).setParseAction(tokenMap(str.title))
4844 OneOrMore(wd).setParseAction(' '.join).runTests('''
4845 now is the winter of our discontent made glorious summer by this sun of york
4846 ''')
4847 prints::
4848 00 11 22 aa FF 0a 0d 1a
4849 [0, 17, 34, 170, 255, 10, 13, 26]
4850
4851 my kingdom for a horse
4852 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4853
4854 now is the winter of our discontent made glorious summer by this sun of york
4855 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4856 """
4857 def pa(s,l,t):
4858 return [func(tokn, *args) for tokn in t]
4859
4860 try:
4861 func_name = getattr(func, '__name__',
4862 getattr(func, '__class__').__name__)
4863 except Exception:
4864 func_name = str(func)
4865 pa.__name__ = func_name
4866
4867 return pa
4868
4869 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4870 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
4871
4872 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4873 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4874
4875 def _makeTags(tagStr, xml):
4876 """Internal helper to construct opening and closing tag expressions, given a tag name"""
4877 if isinstance(tagStr,basestring):
4878 resname = tagStr
4879 tagStr = Keyword(tagStr, caseless=not xml)
4880 else:
4881 resname = tagStr.name
4882
4883 tagAttrName = Word(alphas,alphanums+"_-:")
4884 if (xml):
4885 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
4886 openTag = Suppress("<") + tagStr("tag") + \
4887 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
4888 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4889 else:
4890 printablesLessRAbrack = "".join(c for c in printables if c not in ">")
4891 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
4892 openTag = Suppress("<") + tagStr("tag") + \
4893 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
4894 Optional( Suppress("=") + tagAttrValue ) ))) + \
4895 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4896 closeTag = Combine(_L("</") + tagStr + ">")
4897
4898 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
4899 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
4900 openTag.tag = resname
4901 closeTag.tag = resname
4902 return openTag, closeTag
4903
4904 def makeHTMLTags(tagStr):
4905 """
4906 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
4907 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
4908
4909 Example::
4910 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
4911 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
4912 a,a_end = makeHTMLTags("A")
4913 link_expr = a + SkipTo(a_end)("link_text") + a_end
4914
4915 for link in link_expr.searchString(text):
4916 # attributes in the <A> tag (like "href" shown here) are also accessible as named results
4917 print(link.link_text, '->', link.href)
4918 prints::
4919 pyparsing -> http://pyparsing.wikispaces.com
4920 """
4921 return _makeTags( tagStr, False )
4922
4923 def makeXMLTags(tagStr):
4924 """
4925 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
4926 tags only in the given upper/lower case.
4927
4928 Example: similar to L{makeHTMLTags}
4929 """
4930 return _makeTags( tagStr, True )
4931
4932 def withAttribute(*args,**attrDict):
4933 """
4934 Helper to create a validating parse action to be used with start tags created
4935 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4936 with a required attribute value, to avoid false matches on common tags such as
4937 C{<TD>} or C{<DIV>}.
4938
4939 Call C{withAttribute} with a series of attribute names and values. Specify the list
4940 of filter attributes names and values as:
4941 - keyword arguments, as in C{(align="right")}, or
4942 - as an explicit dict with C{**} operator, when an attribute name is also a Python
4943 reserved word, as in C{**{"class":"Customer", "align":"right"}}
4944 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4945 For attribute names with a namespace prefix, you must use the second form. Attribute
4946 names are matched insensitive to upper/lower case.
4947
4948 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4949
4950 To verify that the attribute exists, but without specifying a value, pass
4951 C{withAttribute.ANY_VALUE} as the value.
4952
4953 Example::
4954 html = '''
4955 <div>
4956 Some text
4957 <div type="grid">1 4 0 1 0</div>
4958 <div type="graph">1,3 2,3 1,1</div>
4959 <div>this has no type</div>
4960 </div>
4961
4962 '''
4963 div,div_end = makeHTMLTags("div")
4964
4965 # only match div tag having a type attribute with value "grid"
4966 div_grid = div().setParseAction(withAttribute(type="grid"))
4967 grid_expr = div_grid + SkipTo(div | div_end)("body")
4968 for grid_header in grid_expr.searchString(html):
4969 print(grid_header.body)
4970
4971 # construct a match with any div tag having a type attribute, regardless of the value
4972 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4973 div_expr = div_any_type + SkipTo(div | div_end)("body")
4974 for div_header in div_expr.searchString(html):
4975 print(div_header.body)
4976 prints::
4977 1 4 0 1 0
4978
4979 1 4 0 1 0
4980 1,3 2,3 1,1
4981 """
4982 if args:
4983 attrs = args[:]
4984 else:
4985 attrs = attrDict.items()
4986 attrs = [(k,v) for k,v in attrs]
4987 def pa(s,l,tokens):
4988 for attrName,attrValue in attrs:
4989 if attrName not in tokens:
4990 raise ParseException(s,l,"no matching attribute " + attrName)
4991 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4992 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4993 (attrName, tokens[attrName], attrValue))
4994 return pa
4995 withAttribute.ANY_VALUE = object()
4996
4997 def withClass(classname, namespace=''):
4998 """
4999 Simplified version of C{L{withAttribute}} when matching on a div class - made
5000 difficult because C{class} is a reserved word in Python.
5001
5002 Example::
5003 html = '''
5004 <div>
5005 Some text
5006 <div class="grid">1 4 0 1 0</div>
5007 <div class="graph">1,3 2,3 1,1</div>
5008 <div>this &lt;div&gt; has no class</div>
5009 </div>
5010
5011 '''
5012 div,div_end = makeHTMLTags("div")
5013 div_grid = div().setParseAction(withClass("grid"))
5014
5015 grid_expr = div_grid + SkipTo(div | div_end)("body")
5016 for grid_header in grid_expr.searchString(html):
5017 print(grid_header.body)
5018
5019 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5020 div_expr = div_any_type + SkipTo(div | div_end)("body")
5021 for div_header in div_expr.searchString(html):
5022 print(div_header.body)
5023 prints::
5024 1 4 0 1 0
5025
5026 1 4 0 1 0
5027 1,3 2,3 1,1
5028 """
5029 classattr = "%s:class" % namespace if namespace else "class"
5030 return withAttribute(**{classattr : classname})
5031
5032 opAssoc = _Constants()
5033 opAssoc.LEFT = object()
5034 opAssoc.RIGHT = object()
5035
5036 def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5037 """
5038 Helper method for constructing grammars of expressions made up of
5039 operators working in a precedence hierarchy. Operators may be unary or
5040 binary, left- or right-associative. Parse actions can also be attached
5041 to operator expressions. The generated parser will also recognize the use
5042 of parentheses to override operator precedences (see example below).
5043
5044 Note: if you define a deep operator list, you may see performance issues
5045 when using infixNotation. See L{ParserElement.enablePackrat} for a
5046 mechanism to potentially improve your parser performance.
5047
5048 Parameters:
5049 - baseExpr - expression representing the most basic element for the nested
5050 - opList - list of tuples, one for each operator precedence level in the
5051 expression grammar; each tuple is of the form
5052 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
5053 - opExpr is the pyparsing expression for the operator;
5054 may also be a string, which will be converted to a Literal;
5055 if numTerms is 3, opExpr is a tuple of two expressions, for the
5056 two operators separating the 3 terms
5057 - numTerms is the number of terms for this operator (must
5058 be 1, 2, or 3)
5059 - rightLeftAssoc is the indicator whether the operator is
5060 right or left associative, using the pyparsing-defined
5061 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
5062 - parseAction is the parse action to be associated with
5063 expressions matching this operator expression (the
5064 parse action tuple member may be omitted); if the parse action
5065 is passed a tuple or list of functions, this is equivalent to
5066 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
5067 - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
5068 - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
5069
5070 Example::
5071 # simple example of four-function arithmetic with ints and variable names
5072 integer = pyparsing_common.signed_integer
5073 varname = pyparsing_common.identifier
5074
5075 arith_expr = infixNotation(integer | varname,
5076 [
5077 ('-', 1, opAssoc.RIGHT),
5078 (oneOf('* /'), 2, opAssoc.LEFT),
5079 (oneOf('+ -'), 2, opAssoc.LEFT),
5080 ])
5081
5082 arith_expr.runTests('''
5083 5+3*6
5084 (5+3)*6
5085 -2--11
5086 ''', fullDump=False)
5087 prints::
5088 5+3*6
5089 [[5, '+', [3, '*', 6]]]
5090
5091 (5+3)*6
5092 [[[5, '+', 3], '*', 6]]
5093
5094 -2--11
5095 [[['-', 2], '-', ['-', 11]]]
5096 """
5097 ret = Forward()
5098 lastExpr = baseExpr | ( lpar + ret + rpar )
5099 for i,operDef in enumerate(opList):
5100 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5101 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5102 if arity == 3:
5103 if opExpr is None or len(opExpr) != 2:
5104 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
5105 opExpr1, opExpr2 = opExpr
5106 thisExpr = Forward().setName(termName)
5107 if rightLeftAssoc == opAssoc.LEFT:
5108 if arity == 1:
5109 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5110 elif arity == 2:
5111 if opExpr is not None:
5112 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5113 else:
5114 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5115 elif arity == 3:
5116 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5117 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5118 else:
5119 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5120 elif rightLeftAssoc == opAssoc.RIGHT:
5121 if arity == 1:
5122 # try to avoid LR with this extra test
5123 if not isinstance(opExpr, Optional):
5124 opExpr = Optional(opExpr)
5125 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5126 elif arity == 2:
5127 if opExpr is not None:
5128 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5129 else:
5130 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5131 elif arity == 3:
5132 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5133 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5134 else:
5135 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5136 else:
5137 raise ValueError("operator must indicate right or left associativity")
5138 if pa:
5139 if isinstance(pa, (tuple, list)):
5140 matchExpr.setParseAction(*pa)
5141 else:
5142 matchExpr.setParseAction(pa)
5143 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5144 lastExpr = thisExpr
5145 ret <<= lastExpr
5146 return ret
5147
5148 operatorPrecedence = infixNotation
5149 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
5150
5151 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5152 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5153 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5154 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5155 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5156
5157 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5158 """
5159 Helper method for defining nested lists enclosed in opening and closing
5160 delimiters ("(" and ")" are the default).
5161
5162 Parameters:
5163 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5164 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5165 - content - expression for items within the nested lists (default=C{None})
5166 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5167
5168 If an expression is not provided for the content argument, the nested
5169 expression will capture all whitespace-delimited content between delimiters
5170 as a list of separate values.
5171
5172 Use the C{ignoreExpr} argument to define expressions that may contain
5173 opening or closing characters that should not be treated as opening
5174 or closing characters for nesting, such as quotedString or a comment
5175 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5176 The default is L{quotedString}, but if no expressions are to be ignored,
5177 then pass C{None} for this argument.
5178
5179 Example::
5180 data_type = oneOf("void int short long char float double")
5181 decl_data_type = Combine(data_type + Optional(Word('*')))
5182 ident = Word(alphas+'_', alphanums+'_')
5183 number = pyparsing_common.number
5184 arg = Group(decl_data_type + ident)
5185 LPAR,RPAR = map(Suppress, "()")
5186
5187 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5188
5189 c_function = (decl_data_type("type")
5190 + ident("name")
5191 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5192 + code_body("body"))
5193 c_function.ignore(cStyleComment)
5194
5195 source_code = '''
5196 int is_odd(int x) {
5197 return (x%2);
5198 }
5199
5200 int dec_to_hex(char hchar) {
5201 if (hchar >= '0' && hchar <= '9') {
5202 return (ord(hchar)-ord('0'));
5203 } else {
5204 return (10+ord(hchar)-ord('A'));
5205 }
5206 }
5207 '''
5208 for func in c_function.searchString(source_code):
5209 print("%(name)s (%(type)s) args: %(args)s" % func)
5210
5211 prints::
5212 is_odd (int) args: [['int', 'x']]
5213 dec_to_hex (int) args: [['char', 'hchar']]
5214 """
5215 if opener == closer:
5216 raise ValueError("opening and closing strings cannot be the same")
5217 if content is None:
5218 if isinstance(opener,basestring) and isinstance(closer,basestring):
5219 if len(opener) == 1 and len(closer)==1:
5220 if ignoreExpr is not None:
5221 content = (Combine(OneOrMore(~ignoreExpr +
5222 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5223 ).setParseAction(lambda t:t[0].strip()))
5224 else:
5225 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5226 ).setParseAction(lambda t:t[0].strip()))
5227 else:
5228 if ignoreExpr is not None:
5229 content = (Combine(OneOrMore(~ignoreExpr +
5230 ~Literal(opener) + ~Literal(closer) +
5231 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5232 ).setParseAction(lambda t:t[0].strip()))
5233 else:
5234 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5235 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5236 ).setParseAction(lambda t:t[0].strip()))
5237 else:
5238 raise ValueError("opening and closing arguments must be strings if no content expression is given")
5239 ret = Forward()
5240 if ignoreExpr is not None:
5241 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5242 else:
5243 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
5244 ret.setName('nested %s%s expression' % (opener,closer))
5245 return ret
5246
5247 def indentedBlock(blockStatementExpr, indentStack, indent=True):
5248 """
5249 Helper method for defining space-delimited indentation blocks, such as
5250 those used to define block statements in Python source code.
5251
5252 Parameters:
5253 - blockStatementExpr - expression defining syntax of statement that
5254 is repeated within the indented block
5255 - indentStack - list created by caller to manage indentation stack
5256 (multiple statementWithIndentedBlock expressions within a single grammar
5257 should share a common indentStack)
5258 - indent - boolean indicating whether block must be indented beyond the
5259 the current level; set to False for block of left-most statements
5260 (default=C{True})
5261
5262 A valid block must contain at least one C{blockStatement}.
5263
5264 Example::
5265 data = '''
5266 def A(z):
5267 A1
5268 B = 100
5269 G = A2
5270 A2
5271 A3
5272 B
5273 def BB(a,b,c):
5274 BB1
5275 def BBA():
5276 bba1
5277 bba2
5278 bba3
5279 C
5280 D
5281 def spam(x,y):
5282 def eggs(z):
5283 pass
5284 '''
5285
5286
5287 indentStack = [1]
5288 stmt = Forward()
5289
5290 identifier = Word(alphas, alphanums)
5291 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5292 func_body = indentedBlock(stmt, indentStack)
5293 funcDef = Group( funcDecl + func_body )
5294
5295 rvalue = Forward()
5296 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5297 rvalue << (funcCall | identifier | Word(nums))
5298 assignment = Group(identifier + "=" + rvalue)
5299 stmt << ( funcDef | assignment | identifier )
5300
5301 module_body = OneOrMore(stmt)
5302
5303 parseTree = module_body.parseString(data)
5304 parseTree.pprint()
5305 prints::
5306 [['def',
5307 'A',
5308 ['(', 'z', ')'],
5309 ':',
5310 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5311 'B',
5312 ['def',
5313 'BB',
5314 ['(', 'a', 'b', 'c', ')'],
5315 ':',
5316 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5317 'C',
5318 'D',
5319 ['def',
5320 'spam',
5321 ['(', 'x', 'y', ')'],
5322 ':',
5323 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5324 """
5325 def checkPeerIndent(s,l,t):
5326 if l >= len(s): return
5327 curCol = col(l,s)
5328 if curCol != indentStack[-1]:
5329 if curCol > indentStack[-1]:
5330 raise ParseFatalException(s,l,"illegal nesting")
5331 raise ParseException(s,l,"not a peer entry")
5332
5333 def checkSubIndent(s,l,t):
5334 curCol = col(l,s)
5335 if curCol > indentStack[-1]:
5336 indentStack.append( curCol )
5337 else:
5338 raise ParseException(s,l,"not a subentry")
5339
5340 def checkUnindent(s,l,t):
5341 if l >= len(s): return
5342 curCol = col(l,s)
5343 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5344 raise ParseException(s,l,"not an unindent")
5345 indentStack.pop()
5346
5347 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5348 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5349 PEER = Empty().setParseAction(checkPeerIndent).setName('')
5350 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5351 if indent:
5352 smExpr = Group( Optional(NL) +
5353 #~ FollowedBy(blockStatementExpr) +
5354 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5355 else:
5356 smExpr = Group( Optional(NL) +
5357 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5358 blockStatementExpr.ignore(_bslash + LineEnd())
5359 return smExpr.setName('indented block')
5360
5361 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5362 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5363
5364 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5365 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5366 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5367 def replaceHTMLEntity(t):
5368 """Helper parser action to replace common HTML entities with their special characters"""
5369 return _htmlEntityMap.get(t.entity)
5370
5371 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
5372 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5373 "Comment of the form C{/* ... */}"
5374
5375 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5376 "Comment of the form C{<!-- ... -->}"
5377
5378 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5379 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5380 "Comment of the form C{// ... (to end of line)}"
5381
5382 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5383 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5384
5385 javaStyleComment = cppStyleComment
5386 "Same as C{L{cppStyleComment}}"
5387
5388 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5389 "Comment of the form C{# ... (to end of line)}"
5390
5391 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5392 Optional( Word(" \t") +
5393 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5394 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5395 """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
5396 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5397
5398 # some other useful expressions - using lower-case class name since we are really using this as a namespace
5399 class pyparsing_common:
5400 """
5401 Here are some common low-level expressions that may be useful in jump-starting parser development:
5402 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
5403 - common L{programming identifiers<identifier>}
5404 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5405 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5406 - L{UUID<uuid>}
5407 - L{comma-separated list<comma_separated_list>}
5408 Parse actions:
5409 - C{L{convertToInteger}}
5410 - C{L{convertToFloat}}
5411 - C{L{convertToDate}}
5412 - C{L{convertToDatetime}}
5413 - C{L{stripHTMLTags}}
5414 - C{L{upcaseTokens}}
5415 - C{L{downcaseTokens}}
5416
5417 Example::
5418 pyparsing_common.number.runTests('''
5419 # any int or real number, returned as the appropriate type
5420 100
5421 -100
5422 +100
5423 3.14159
5424 6.02e23
5425 1e-12
5426 ''')
5427
5428 pyparsing_common.fnumber.runTests('''
5429 # any int or real number, returned as float
5430 100
5431 -100
5432 +100
5433 3.14159
5434 6.02e23
5435 1e-12
5436 ''')
5437
5438 pyparsing_common.hex_integer.runTests('''
5439 # hex numbers
5440 100
5441 FF
5442 ''')
5443
5444 pyparsing_common.fraction.runTests('''
5445 # fractions
5446 1/2
5447 -3/4
5448 ''')
5449
5450 pyparsing_common.mixed_integer.runTests('''
5451 # mixed fractions
5452 1
5453 1/2
5454 -3/4
5455 1-3/4
5456 ''')
5457
5458 import uuid
5459 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5460 pyparsing_common.uuid.runTests('''
5461 # uuid
5462 12345678-1234-5678-1234-567812345678
5463 ''')
5464 prints::
5465 # any int or real number, returned as the appropriate type
5466 100
5467 [100]
5468
5469 -100
5470 [-100]
5471
5472 +100
5473 [100]
5474
5475 3.14159
5476 [3.14159]
5477
5478 6.02e23
5479 [6.02e+23]
5480
5481 1e-12
5482 [1e-12]
5483
5484 # any int or real number, returned as float
5485 100
5486 [100.0]
5487
5488 -100
5489 [-100.0]
5490
5491 +100
5492 [100.0]
5493
5494 3.14159
5495 [3.14159]
5496
5497 6.02e23
5498 [6.02e+23]
5499
5500 1e-12
5501 [1e-12]
5502
5503 # hex numbers
5504 100
5505 [256]
5506
5507 FF
5508 [255]
5509
5510 # fractions
5511 1/2
5512 [0.5]
5513
5514 -3/4
5515 [-0.75]
5516
5517 # mixed fractions
5518 1
5519 [1]
5520
5521 1/2
5522 [0.5]
5523
5524 -3/4
5525 [-0.75]
5526
5527 1-3/4
5528 [1.75]
5529
5530 # uuid
5531 12345678-1234-5678-1234-567812345678
5532 [UUID('12345678-1234-5678-1234-567812345678')]
5533 """
5534
5535 convertToInteger = tokenMap(int)
5536 """
5537 Parse action for converting parsed integers to Python int
5538 """
5539
5540 convertToFloat = tokenMap(float)
5541 """
5542 Parse action for converting parsed numbers to Python float
5543 """
5544
5545 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5546 """expression that parses an unsigned integer, returns an int"""
5547
5548 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5549 """expression that parses a hexadecimal integer, returns an int"""
5550
5551 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5552 """expression that parses an integer with optional leading sign, returns an int"""
5553
5554 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
5555 """fractional expression of an integer divided by an integer, returns a float"""
5556 fraction.addParseAction(lambda t: t[0]/t[-1])
5557
5558 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5559 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5560 mixed_integer.addParseAction(sum)
5561
5562 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5563 """expression that parses a floating point number and returns a float"""
5564
5565 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5566 """expression that parses a floating point number with optional scientific notation and returns a float"""
5567
5568 # streamlining this expression makes the docs nicer-looking
5569 number = (sci_real | real | signed_integer).streamline()
5570 """any numeric expression, returns the corresponding Python type"""
5571
5572 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5573 """any int or real number, returned as float"""
5574
5575 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5576 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5577
5578 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5579 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5580
5581 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5582 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5583 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5584 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5585 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5586 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5587 "IPv6 address (long, short, or mixed form)"
5588
5589 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5590 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5591
5592 @staticmethod
5593 def convertToDate(fmt="%Y-%m-%d"):
5594 """
5595 Helper to create a parse action for converting parsed date string to Python datetime.date
5596
5597 Params -
5598 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5599
5600 Example::
5601 date_expr = pyparsing_common.iso8601_date.copy()
5602 date_expr.setParseAction(pyparsing_common.convertToDate())
5603 print(date_expr.parseString("1999-12-31"))
5604 prints::
5605 [datetime.date(1999, 12, 31)]
5606 """
5607 def cvt_fn(s,l,t):
5608 try:
5609 return datetime.strptime(t[0], fmt).date()
5610 except ValueError as ve:
5611 raise ParseException(s, l, str(ve))
5612 return cvt_fn
5613
5614 @staticmethod
5615 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5616 """
5617 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5618
5619 Params -
5620 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5621
5622 Example::
5623 dt_expr = pyparsing_common.iso8601_datetime.copy()
5624 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5625 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5626 prints::
5627 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5628 """
5629 def cvt_fn(s,l,t):
5630 try:
5631 return datetime.strptime(t[0], fmt)
5632 except ValueError as ve:
5633 raise ParseException(s, l, str(ve))
5634 return cvt_fn
5635
5636 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5637 "ISO8601 date (C{yyyy-mm-dd})"
5638
5639 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5640 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5641
5642 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5643 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5644
5645 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5646 @staticmethod
5647 def stripHTMLTags(s, l, tokens):
5648 """
5649 Parse action to remove HTML tags from web page HTML source
5650
5651 Example::
5652 # strip HTML links from normal text
5653 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
5654 td,td_end = makeHTMLTags("TD")
5655 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
5656
5657 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
5658 """
5659 return pyparsing_common._html_stripper.transformString(tokens[0])
5660
5661 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
5662 + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
5663 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
5664 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5665
5666 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
5667 """Parse action to convert tokens to upper case."""
5668
5669 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
5670 """Parse action to convert tokens to lower case."""
5671
5672
5673 if __name__ == "__main__":
5674
5675 selectToken = CaselessLiteral("select")
5676 fromToken = CaselessLiteral("from")
5677
5678 ident = Word(alphas, alphanums + "_$")
5679
5680 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5681 columnNameList = Group(delimitedList(columnName)).setName("columns")
5682 columnSpec = ('*' | columnNameList)
5683
5684 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5685 tableNameList = Group(delimitedList(tableName)).setName("tables")
5686
5687 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5688
5689 # demo runTests method, including embedded comments in test string
5690 simpleSQL.runTests("""
5691 # '*' as column list and dotted table name
5692 select * from SYS.XYZZY
5693
5694 # caseless match on "SELECT", and casts back to "select"
5695 SELECT * from XYZZY, ABC
5696
5697 # list of column names, and mixed case SELECT keyword
5698 Select AA,BB,CC from Sys.dual
5699
5700 # multiple tables
5701 Select A, B, C from Sys.dual, Table2
5702
5703 # invalid SELECT keyword - should fail
5704 Xelect A, B, C from Sys.dual
5705
5706 # incomplete command - should fail
5707 Select
5708
5709 # invalid column name - should fail
5710 Select ^^^ frox Sys.dual
5711
5712 """)
5713
5714 pyparsing_common.number.runTests("""
5715 100
5716 -100
5717 +100
5718 3.14159
5719 6.02e23
5720 1e-12
5721 """)
5722
5723 # any int or real number, returned as float
5724 pyparsing_common.fnumber.runTests("""
5725 100
5726 -100
5727 +100
5728 3.14159
5729 6.02e23
5730 1e-12
5731 """)
5732
5733 pyparsing_common.hex_integer.runTests("""
5734 100
5735 FF
5736 """)
5737
5738 import uuid
5739 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5740 pyparsing_common.uuid.runTests("""
5741 12345678-1234-5678-1234-567812345678
5742 """)