12 _escape_regex_range_chars
,
22 int_expr
: typing
.Optional
[ParserElement
] = None,
24 intExpr
: typing
.Optional
[ParserElement
] = None,
26 """Helper to define a counted list of expressions.
28 This helper defines a pattern of the form::
30 integer expr expr expr...
32 where the leading integer tells how many expr expressions follow.
33 The matched tokens returns the array of expr tokens as a list - the
34 leading count token is suppressed.
36 If ``int_expr`` is specified, it should be a pyparsing expression
37 that produces an integer value.
41 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']
43 # in this parser, the leading integer value is given in binary,
44 # '10' indicating that 2 values are in the array
45 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
46 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']
48 # if other fields must be parsed after the count but before the
49 # list items, give the fields results names and they will
50 # be preserved in the returned ParseResults:
51 count_with_metadata = integer + Word(alphas)("type")
52 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
53 result = typed_array.parse_string("3 bool True True False")
57 # ['True', 'True', 'False']
58 # - items: ['True', 'True', 'False']
61 intExpr
= intExpr
or int_expr
62 array_expr
= Forward()
64 def count_field_parse_action(s
, l
, t
):
67 array_expr
<<= (expr
* n
) if n
else Empty()
68 # clear list contents, but keep any named results
72 intExpr
= Word(nums
).set_parse_action(lambda t
: int(t
[0]))
74 intExpr
= intExpr
.copy()
75 intExpr
.set_name("arrayLen")
76 intExpr
.add_parse_action(count_field_parse_action
, call_during_try
=True)
77 return (intExpr
+ array_expr
).set_name("(len) " + str(expr
) + "...")
80 def match_previous_literal(expr
: ParserElement
) -> ParserElement
:
81 """Helper to define an expression that is indirectly defined from
82 the tokens matched in a previous expression, that is, it looks for
83 a 'repeat' of a previous expression. For example::
86 second = match_previous_literal(first)
87 match_expr = first + ":" + second
89 will match ``"1:1"``, but not ``"1:2"``. Because this
90 matches a previous literal, will also match the leading
91 ``"1:1"`` in ``"1:10"``. If this is not desired, use
92 :class:`match_previous_expr`. Do *not* use with packrat parsing
97 def copy_token_to_repeater(s
, l
, t
):
103 tflat
= _flatten(t
.as_list())
104 rep
<< And(Literal(tt
) for tt
in tflat
)
108 expr
.add_parse_action(copy_token_to_repeater
, callDuringTry
=True)
109 rep
.set_name("(prev) " + str(expr
))
113 def match_previous_expr(expr
: ParserElement
) -> ParserElement
:
114 """Helper to define an expression that is indirectly defined from
115 the tokens matched in a previous expression, that is, it looks for
116 a 'repeat' of a previous expression. For example::
119 second = match_previous_expr(first)
120 match_expr = first + ":" + second
122 will match ``"1:1"``, but not ``"1:2"``. Because this
123 matches by expressions, will *not* match the leading ``"1:1"``
124 in ``"1:10"``; the expressions are evaluated first, and then
125 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
126 with packrat parsing enabled.
132 def copy_token_to_repeater(s
, l
, t
):
133 matchTokens
= _flatten(t
.as_list())
135 def must_match_these_tokens(s
, l
, t
):
136 theseTokens
= _flatten(t
.as_list())
137 if theseTokens
!= matchTokens
:
138 raise ParseException(
139 s
, l
, f
"Expected {matchTokens}, found{theseTokens}"
142 rep
.set_parse_action(must_match_these_tokens
, callDuringTry
=True)
144 expr
.add_parse_action(copy_token_to_repeater
, callDuringTry
=True)
145 rep
.set_name("(prev) " + str(expr
))
150 strs
: Union
[typing
.Iterable
[str], str],
151 caseless
: bool = False,
152 use_regex
: bool = True,
153 as_keyword
: bool = False,
155 useRegex
: bool = True,
156 asKeyword
: bool = False,
158 """Helper to quickly define a set of alternative :class:`Literal` s,
159 and makes sure to do longest-first testing when there is a conflict,
160 regardless of the input order, but returns
161 a :class:`MatchFirst` for best performance.
165 - ``strs`` - a string of space-delimited literals, or a collection of
167 - ``caseless`` - treat all literals as caseless - (default= ``False``)
168 - ``use_regex`` - as an optimization, will
169 generate a :class:`Regex` object; otherwise, will generate
170 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if
171 creating a :class:`Regex` raises an exception) - (default= ``True``)
172 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
173 generated expressions - (default= ``False``)
174 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
175 but will be removed in a future release
179 comp_oper = one_of("< = > <= >= !=")
183 comparison_expr = term + comp_oper + term
184 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
188 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
190 asKeyword
= asKeyword
or as_keyword
191 useRegex
= useRegex
and use_regex
194 isinstance(caseless
, str_type
)
195 and __diag__
.warn_on_multiple_string_args_to_oneof
198 "More than one string argument passed to one_of, pass"
199 " choices as a list or space-delimited string",
204 isequal
= lambda a
, b
: a
.upper() == b
.upper()
205 masks
= lambda a
, b
: b
.upper().startswith(a
.upper())
206 parseElementClass
= CaselessKeyword
if asKeyword
else CaselessLiteral
208 isequal
= lambda a
, b
: a
== b
209 masks
= lambda a
, b
: b
.startswith(a
)
210 parseElementClass
= Keyword
if asKeyword
else Literal
212 symbols
: List
[str] = []
213 if isinstance(strs
, str_type
):
214 strs
= typing
.cast(str, strs
)
215 symbols
= strs
.split()
216 elif isinstance(strs
, Iterable
):
219 raise TypeError("Invalid argument to one_of, expected string or iterable")
223 # reorder given symbols to take care to avoid masking longer choices with shorter ones
224 # (but only if the given symbols are not just single characters)
225 if any(len(sym
) > 1 for sym
in symbols
):
227 while i
< len(symbols
) - 1:
229 for j
, other
in enumerate(symbols
[i
+ 1 :]):
230 if isequal(other
, cur
):
231 del symbols
[i
+ j
+ 1]
233 elif masks(cur
, other
):
234 del symbols
[i
+ j
+ 1]
235 symbols
.insert(i
, other
)
241 re_flags
: int = re
.IGNORECASE
if caseless
else 0
244 if all(len(sym
) == 1 for sym
in symbols
):
245 # symbols are just single characters, create range regex pattern
246 patt
= f
"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
248 patt
= "|".join(re
.escape(sym
) for sym
in symbols
)
250 # wrap with \b word break markers if defining as keywords
252 patt
= rf
"\b(?:{patt})\b"
254 ret
= Regex(patt
, flags
=re_flags
).set_name(" | ".join(symbols
))
257 # add parse action to return symbols as specified, not in random
258 # casing as found in input string
259 symbol_map
= {sym.lower(): sym for sym in symbols}
260 ret
.add_parse_action(lambda s
, l
, t
: symbol_map
[t
[0].lower()])
266 "Exception creating Regex for one_of, building MatchFirst", stacklevel
=2
269 # last resort, just use MatchFirst
270 return MatchFirst(parseElementClass(sym
) for sym
in symbols
).set_name(
275 def dict_of(key
: ParserElement
, value
: ParserElement
) -> ParserElement
:
276 """Helper to easily and clearly define a dictionary by specifying
277 the respective patterns for the key and value. Takes care of
278 defining the :class:`Dict`, :class:`ZeroOrMore`, and
279 :class:`Group` tokens in the proper order. The key pattern
280 can include delimiting markers or punctuation, as long as they are
281 suppressed, thereby leaving the significant key text. The value
282 pattern can include named results, so that the :class:`Dict` results
283 can include named token fields.
287 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
288 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
289 print(attr_expr[1, ...].parse_string(text).dump())
292 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
294 # similar to Dict, but simpler call format
295 result = dict_of(attr_label, attr_value).parse_string(text)
297 print(result['shape'])
298 print(result.shape) # object attribute access works too
299 print(result.as_dict())
303 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
304 - color: 'light blue'
310 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
312 return Dict(OneOrMore(Group(key
+ value
)))
315 def original_text_for(
316 expr
: ParserElement
, as_string
: bool = True, *, asString
: bool = True
318 """Helper to return the original, untokenized text for a given
319 expression. Useful to restore the parsed fields of an HTML start
320 tag into the raw tag text itself, or to revert separate tokens with
321 intervening whitespace back to the original matching input text. By
322 default, returns a string containing the original parsed text.
324 If the optional ``as_string`` argument is passed as
325 ``False``, then the return value is
326 a :class:`ParseResults` containing any results names that
327 were originally matched, and a single token containing the original
328 matched text from the input string. So if the expression passed to
329 :class:`original_text_for` contains expressions with defined
330 results names, you must set ``as_string`` to ``False`` if you
331 want to preserve those results name values.
333 The ``asString`` pre-PEP8 argument is retained for compatibility,
334 but will be removed in a future release.
338 src = "this is test <b> bold <i>text</i> </b> normal text "
339 for tag in ("b", "i"):
340 opener, closer = make_html_tags(tag)
341 patt = original_text_for(opener + ... + closer)
342 print(patt.search_string(src)[0])
346 ['<b> bold <i>text</i> </b>']
349 asString
= asString
and as_string
351 locMarker
= Empty().set_parse_action(lambda s
, loc
, t
: loc
)
352 endlocMarker
= locMarker
.copy()
353 endlocMarker
.callPreparse
= False
354 matchExpr
= locMarker("_original_start") + expr
+ endlocMarker("_original_end")
356 extractText
= lambda s
, l
, t
: s
[t
._original
_start
: t
._original
_end
]
359 def extractText(s
, l
, t
):
360 t
[:] = [s
[t
.pop("_original_start") : t
.pop("_original_end")]]
362 matchExpr
.set_parse_action(extractText
)
363 matchExpr
.ignoreExprs
= expr
.ignoreExprs
364 matchExpr
.suppress_warning(Diagnostics
.warn_ungrouped_named_tokens_in_collection
)
368 def ungroup(expr
: ParserElement
) -> ParserElement
:
369 """Helper to undo pyparsing's default grouping of And expressions,
370 even if all but one are non-empty.
372 return TokenConverter(expr
).add_parse_action(lambda t
: t
[0])
375 def locatedExpr(expr
: ParserElement
) -> ParserElement
:
377 (DEPRECATED - future code should use the :class:`Located` class)
378 Helper to decorate a returned token with its starting and ending
379 locations in the input string.
381 This helper adds the following results names:
383 - ``locn_start`` - location where matched expression begins
384 - ``locn_end`` - location where matched expression ends
385 - ``value`` - the actual parsed results
387 Be careful if the input text contains ``<TAB>`` characters, you
388 may want to call :class:`ParserElement.parse_with_tabs`
393 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
402 locator
= Empty().set_parse_action(lambda ss
, ll
, tt
: ll
)
404 locator("locn_start")
406 + locator
.copy().leaveWhitespace()("locn_end")
411 opener
: Union
[str, ParserElement
] = "(",
412 closer
: Union
[str, ParserElement
] = ")",
413 content
: typing
.Optional
[ParserElement
] = None,
414 ignore_expr
: ParserElement
= quoted_string(),
416 ignoreExpr
: ParserElement
= quoted_string(),
418 """Helper method for defining nested lists enclosed in opening and
419 closing delimiters (``"("`` and ``")"`` are the default).
423 - ``opener`` - opening character for a nested list
424 (default= ``"("``); can also be a pyparsing expression
425 - ``closer`` - closing character for a nested list
426 (default= ``")"``); can also be a pyparsing expression
427 - ``content`` - expression for items within the nested lists
429 - ``ignore_expr`` - expression for ignoring opening and closing delimiters
430 (default= :class:`quoted_string`)
431 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
432 but will be removed in a future release
434 If an expression is not provided for the content argument, the
435 nested expression will capture all whitespace-delimited content
436 between delimiters as a list of separate values.
438 Use the ``ignore_expr`` argument to define expressions that may
439 contain opening or closing characters that should not be treated as
440 opening or closing characters for nesting, such as quoted_string or
441 a comment expression. Specify multiple expressions using an
442 :class:`Or` or :class:`MatchFirst`. The default is
443 :class:`quoted_string`, but if no expressions are to be ignored, then
444 pass ``None`` for this argument.
448 data_type = one_of("void int short long char float double")
449 decl_data_type = Combine(data_type + Opt(Word('*')))
450 ident = Word(alphas+'_', alphanums+'_')
451 number = pyparsing_common.number
452 arg = Group(decl_data_type + ident)
453 LPAR, RPAR = map(Suppress, "()")
455 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
457 c_function = (decl_data_type("type")
459 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
461 c_function.ignore(c_style_comment)
468 int dec_to_hex(char hchar) {
469 if (hchar >= '0' && hchar <= '9') {
470 return (ord(hchar)-ord('0'));
472 return (10+ord(hchar)-ord('A'));
476 for func in c_function.search_string(source_code):
477 print("%(name)s (%(type)s) args: %(args)s" % func)
482 is_odd (int) args: [['int', 'x']]
483 dec_to_hex (int) args: [['char', 'hchar']]
485 if ignoreExpr
!= ignore_expr
:
486 ignoreExpr
= ignore_expr
if ignoreExpr
== quoted_string() else ignoreExpr
488 raise ValueError("opening and closing strings cannot be the same")
490 if isinstance(opener
, str_type
) and isinstance(closer
, str_type
):
491 opener
= typing
.cast(str, opener
)
492 closer
= typing
.cast(str, closer
)
493 if len(opener
) == 1 and len(closer
) == 1:
494 if ignoreExpr
is not None:
499 opener
+ closer
+ ParserElement
.DEFAULT_WHITE_CHARS
,
503 ).set_parse_action(lambda t
: t
[0].strip())
505 content
= empty
.copy() + CharsNotIn(
506 opener
+ closer
+ ParserElement
.DEFAULT_WHITE_CHARS
507 ).set_parse_action(lambda t
: t
[0].strip())
509 if ignoreExpr
is not None:
515 + CharsNotIn(ParserElement
.DEFAULT_WHITE_CHARS
, exact
=1)
517 ).set_parse_action(lambda t
: t
[0].strip())
523 + CharsNotIn(ParserElement
.DEFAULT_WHITE_CHARS
, exact
=1)
525 ).set_parse_action(lambda t
: t
[0].strip())
528 "opening and closing arguments must be strings if no content expression is given"
531 if ignoreExpr
is not None:
533 Suppress(opener
) + ZeroOrMore(ignoreExpr | ret | content
) + Suppress(closer
)
536 ret
<<= Group(Suppress(opener
) + ZeroOrMore(ret | content
) + Suppress(closer
))
537 ret
.set_name("nested %s%s expression" % (opener
, closer
))
541 def _makeTags(tagStr
, xml
, suppress_LT
=Suppress("<"), suppress_GT
=Suppress(">")):
542 """Internal helper to construct opening and closing tag expressions, given a tag name"""
543 if isinstance(tagStr
, str_type
):
545 tagStr
= Keyword(tagStr
, caseless
=not xml
)
547 resname
= tagStr
.name
549 tagAttrName
= Word(alphas
, alphanums
+ "_-:")
551 tagAttrValue
= dbl_quoted_string
.copy().set_parse_action(remove_quotes
)
555 + Dict(ZeroOrMore(Group(tagAttrName
+ Suppress("=") + tagAttrValue
)))
556 + Opt("/", default
=[False])("empty").set_parse_action(
557 lambda s
, l
, t
: t
[0] == "/"
562 tagAttrValue
= quoted_string
.copy().set_parse_action(remove_quotes
) |
Word(
563 printables
, exclude_chars
=">"
571 tagAttrName
.set_parse_action(lambda t
: t
[0].lower())
572 + Opt(Suppress("=") + tagAttrValue
)
576 + Opt("/", default
=[False])("empty").set_parse_action(
577 lambda s
, l
, t
: t
[0] == "/"
581 closeTag
= Combine(Literal("</") + tagStr
+ ">", adjacent
=False)
583 openTag
.set_name("<%s>" % resname
)
584 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
585 openTag
.add_parse_action(
586 lambda t
: t
.__setitem
__(
587 "start" + "".join(resname
.replace(":", " ").title().split()), t
.copy()
591 "end" + "".join(resname
.replace(":", " ").title().split())
592 ).set_name("</%s>" % resname
)
593 openTag
.tag
= resname
594 closeTag
.tag
= resname
595 openTag
.tag_body
= SkipTo(closeTag())
596 return openTag
, closeTag
600 tag_str
: Union
[str, ParserElement
]
601 ) -> Tuple
[ParserElement
, ParserElement
]:
602 """Helper to construct opening and closing tag expressions for HTML,
603 given a tag name. Matches tags in either upper or lower case,
604 attributes with namespaces and with quoted or unquoted values.
608 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
609 # make_html_tags returns pyparsing expressions for the opening and
610 # closing tags as a 2-tuple
611 a, a_end = make_html_tags("A")
612 link_expr = a + SkipTo(a_end)("link_text") + a_end
614 for link in link_expr.search_string(text):
615 # attributes in the <A> tag (like "href" shown here) are
616 # also accessible as named results
617 print(link.link_text, '->', link.href)
621 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
623 return _makeTags(tag_str
, False)
627 tag_str
: Union
[str, ParserElement
]
628 ) -> Tuple
[ParserElement
, ParserElement
]:
629 """Helper to construct opening and closing tag expressions for XML,
630 given a tag name. Matches tags only in the given upper/lower case.
632 Example: similar to :class:`make_html_tags`
634 return _makeTags(tag_str
, True)
637 any_open_tag
: ParserElement
638 any_close_tag
: ParserElement
639 any_open_tag
, any_close_tag
= make_html_tags(
640 Word(alphas
, alphanums
+ "_:").set_name("any tag")
643 _htmlEntityMap
= {k.rstrip(";"): v for k, v in html.entities.html5.items()}
644 common_html_entity
= Regex("&(?P<entity>" + "|".join(_htmlEntityMap
) + ");").set_name(
649 def replace_html_entity(s
, l
, t
):
650 """Helper parser action to replace common HTML entities with their special characters"""
651 return _htmlEntityMap
.get(t
.entity
)
655 """Enumeration of operator associativity
656 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
662 InfixNotationOperatorArgType
= Union
[
663 ParserElement
, str, Tuple
[Union
[ParserElement
, str], Union
[ParserElement
, str]]
665 InfixNotationOperatorSpec
= Union
[
667 InfixNotationOperatorArgType
,
670 typing
.Optional
[ParseAction
],
673 InfixNotationOperatorArgType
,
681 base_expr
: ParserElement
,
682 op_list
: List
[InfixNotationOperatorSpec
],
683 lpar
: Union
[str, ParserElement
] = Suppress("("),
684 rpar
: Union
[str, ParserElement
] = Suppress(")"),
686 """Helper method for constructing grammars of expressions made up of
687 operators working in a precedence hierarchy. Operators may be unary
688 or binary, left- or right-associative. Parse actions can also be
689 attached to operator expressions. The generated parser will also
690 recognize the use of parentheses to override operator precedences
693 Note: if you define a deep operator list, you may see performance
694 issues when using infix_notation. See
695 :class:`ParserElement.enable_packrat` for a mechanism to potentially
696 improve your parser performance.
700 - ``base_expr`` - expression representing the most basic operand to
701 be used in the expression
702 - ``op_list`` - list of tuples, one for each operator precedence level
703 in the expression grammar; each tuple is of the form ``(op_expr,
704 num_operands, right_left_assoc, (optional)parse_action)``, where:
706 - ``op_expr`` is the pyparsing expression for the operator; may also
707 be a string, which will be converted to a Literal; if ``num_operands``
708 is 3, ``op_expr`` is a tuple of two expressions, for the two
709 operators separating the 3 terms
710 - ``num_operands`` is the number of terms for this operator (must be 1,
712 - ``right_left_assoc`` is the indicator whether the operator is right
713 or left associative, using the pyparsing-defined constants
714 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
715 - ``parse_action`` is the parse action to be associated with
716 expressions matching this operator expression (the parse action
717 tuple member may be omitted); if the parse action is passed
718 a tuple or list of functions, this is equivalent to calling
719 ``set_parse_action(*fn)``
720 (:class:`ParserElement.set_parse_action`)
721 - ``lpar`` - expression for matching left-parentheses; if passed as a
722 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
723 an expression (such as ``Literal('(')``), then it will be kept in
724 the parsed results, and grouped with them. (default= ``Suppress('(')``)
725 - ``rpar`` - expression for matching right-parentheses; if passed as a
726 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
727 an expression (such as ``Literal(')')``), then it will be kept in
728 the parsed results, and grouped with them. (default= ``Suppress(')')``)
732 # simple example of four-function arithmetic with ints and
734 integer = pyparsing_common.signed_integer
735 varname = pyparsing_common.identifier
737 arith_expr = infix_notation(integer | varname,
739 ('-', 1, OpAssoc.RIGHT),
740 (one_of('* /'), 2, OpAssoc.LEFT),
741 (one_of('+ -'), 2, OpAssoc.LEFT),
744 arith_expr.run_tests('''
748 ''', full_dump=False)
753 [[5, '+', [3, '*', 6]]]
756 [[[5, '+', 3], '*', 6]]
759 [[[5, '+', 'x'], '*', 'y']]
762 [[['-', 2], '-', ['-', 11]]]
765 # captive version of FollowedBy that does not do parse actions or capture results names
766 class _FB(FollowedBy
):
767 def parseImpl(self
, instring
, loc
, doActions
=True):
768 self
.expr
.try_parse(instring
, loc
)
771 _FB
.__name
__ = "FollowedBy>"
774 if isinstance(lpar
, str):
775 lpar
= Suppress(lpar
)
776 if isinstance(rpar
, str):
777 rpar
= Suppress(rpar
)
779 # if lpar and rpar are not suppressed, wrap in group
780 if not (isinstance(rpar
, Suppress
) and isinstance(rpar
, Suppress
)):
781 lastExpr
= base_expr |
Group(lpar
+ ret
+ rpar
)
783 lastExpr
= base_expr |
(lpar
+ ret
+ rpar
)
786 rightLeftAssoc
: opAssoc
787 pa
: typing
.Optional
[ParseAction
]
788 opExpr1
: ParserElement
789 opExpr2
: ParserElement
790 for i
, operDef
in enumerate(op_list
):
791 opExpr
, arity
, rightLeftAssoc
, pa
= (operDef
+ (None,))[:4] # type: ignore[assignment]
792 if isinstance(opExpr
, str_type
):
793 opExpr
= ParserElement
._literalStringClass
(opExpr
)
794 opExpr
= typing
.cast(ParserElement
, opExpr
)
796 if not isinstance(opExpr
, (tuple, list)) or len(opExpr
) != 2:
798 "if numterms=3, opExpr must be a tuple or list of two expressions"
800 opExpr1
, opExpr2
= opExpr
801 term_name
= f
"{opExpr1}{opExpr2} term"
803 term_name
= f
"{opExpr} term"
805 if not 1 <= arity
<= 3:
806 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
808 if rightLeftAssoc
not in (OpAssoc
.LEFT
, OpAssoc
.RIGHT
):
809 raise ValueError("operator must indicate right or left associativity")
811 thisExpr
: ParserElement
= Forward().set_name(term_name
)
812 thisExpr
= typing
.cast(Forward
, thisExpr
)
813 if rightLeftAssoc
is OpAssoc
.LEFT
:
815 matchExpr
= _FB(lastExpr
+ opExpr
) + Group(lastExpr
+ opExpr
[1, ...])
817 if opExpr
is not None:
818 matchExpr
= _FB(lastExpr
+ opExpr
+ lastExpr
) + Group(
819 lastExpr
+ (opExpr
+ lastExpr
)[1, ...]
822 matchExpr
= _FB(lastExpr
+ lastExpr
) + Group(lastExpr
[2, ...])
825 lastExpr
+ opExpr1
+ lastExpr
+ opExpr2
+ lastExpr
826 ) + Group(lastExpr
+ OneOrMore(opExpr1
+ lastExpr
+ opExpr2
+ lastExpr
))
827 elif rightLeftAssoc
is OpAssoc
.RIGHT
:
829 # try to avoid LR with this extra test
830 if not isinstance(opExpr
, Opt
):
832 matchExpr
= _FB(opExpr
.expr
+ thisExpr
) + Group(opExpr
+ thisExpr
)
834 if opExpr
is not None:
835 matchExpr
= _FB(lastExpr
+ opExpr
+ thisExpr
) + Group(
836 lastExpr
+ (opExpr
+ thisExpr
)[1, ...]
839 matchExpr
= _FB(lastExpr
+ thisExpr
) + Group(
840 lastExpr
+ thisExpr
[1, ...]
844 lastExpr
+ opExpr1
+ thisExpr
+ opExpr2
+ thisExpr
845 ) + Group(lastExpr
+ opExpr1
+ thisExpr
+ opExpr2
+ thisExpr
)
847 if isinstance(pa
, (tuple, list)):
848 matchExpr
.set_parse_action(*pa
)
850 matchExpr
.set_parse_action(pa
)
851 thisExpr
<<= (matchExpr | lastExpr
).setName(term_name
)
857 def indentedBlock(blockStatementExpr
, indentStack
, indent
=True, backup_stacks
=[]):
859 (DEPRECATED - use :class:`IndentedBlock` class instead)
860 Helper method for defining space-delimited indentation blocks,
861 such as those used to define block statements in Python source code.
865 - ``blockStatementExpr`` - expression defining syntax of statement that
866 is repeated within the indented block
867 - ``indentStack`` - list created by caller to manage indentation stack
868 (multiple ``statementWithIndentedBlock`` expressions within a single
869 grammar should share a common ``indentStack``)
870 - ``indent`` - boolean indicating whether block must be indented beyond
871 the current level; set to ``False`` for block of left-most statements
874 A valid block must contain at least one ``blockStatement``.
876 (Note that indentedBlock uses internal parse actions which make it
877 incompatible with packrat parsing.)
906 identifier = Word(alphas, alphanums)
907 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
908 func_body = indentedBlock(stmt, indentStack)
909 funcDef = Group(funcDecl + func_body)
912 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
913 rvalue << (funcCall | identifier | Word(nums))
914 assignment = Group(identifier + "=" + rvalue)
915 stmt << (funcDef | assignment | identifier)
917 module_body = stmt[1, ...]
919 parseTree = module_body.parseString(data)
928 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
932 ['(', 'a', 'b', 'c', ')'],
934 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
939 ['(', 'x', 'y', ')'],
941 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
943 backup_stacks
.append(indentStack
[:])
946 indentStack
[:] = backup_stacks
[-1]
948 def checkPeerIndent(s
, l
, t
):
952 if curCol
!= indentStack
[-1]:
953 if curCol
> indentStack
[-1]:
954 raise ParseException(s
, l
, "illegal nesting")
955 raise ParseException(s
, l
, "not a peer entry")
957 def checkSubIndent(s
, l
, t
):
959 if curCol
> indentStack
[-1]:
960 indentStack
.append(curCol
)
962 raise ParseException(s
, l
, "not a subentry")
964 def checkUnindent(s
, l
, t
):
968 if not (indentStack
and curCol
in indentStack
):
969 raise ParseException(s
, l
, "not an unindent")
970 if curCol
< indentStack
[-1]:
973 NL
= OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
974 INDENT
= (Empty() + Empty().set_parse_action(checkSubIndent
)).set_name("INDENT")
975 PEER
= Empty().set_parse_action(checkPeerIndent
).set_name("")
976 UNDENT
= Empty().set_parse_action(checkUnindent
).set_name("UNINDENT")
981 + OneOrMore(PEER
+ Group(blockStatementExpr
) + Opt(NL
))
987 + OneOrMore(PEER
+ Group(blockStatementExpr
) + Opt(NL
))
991 # add a parse action to remove backup_stack from list of backups
992 smExpr
.add_parse_action(
993 lambda: backup_stacks
.pop(-1) and None if backup_stacks
else None
995 smExpr
.set_fail_action(lambda a
, b
, c
, d
: reset_stack())
996 blockStatementExpr
.ignore(_bslash
+ LineEnd())
997 return smExpr
.set_name("indented block")
1000 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
1001 c_style_comment
= Combine(Regex(r
"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
1004 "Comment of the form ``/* ... */``"
1006 html_comment
= Regex(r
"<!--[\s\S]*?-->").set_name("HTML comment")
1007 "Comment of the form ``<!-- ... -->``"
1009 rest_of_line
= Regex(r
".*").leave_whitespace().set_name("rest of line")
1010 dbl_slash_comment
= Regex(r
"//(?:\\\n|[^\n])*").set_name("// comment")
1011 "Comment of the form ``// ... (to end of line)``"
1013 cpp_style_comment
= Combine(
1014 Regex(r
"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
1015 ).set_name("C++ style comment")
1016 "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1018 java_style_comment
= cpp_style_comment
1019 "Same as :class:`cpp_style_comment`"
1021 python_style_comment
= Regex(r
"#.*").set_name("Python style comment")
1022 "Comment of the form ``# ... (to end of line)``"
1025 # build list of built-in expressions, for future reference if a global default value
1027 _builtin_exprs
: List
[ParserElement
] = [
1028 v
for v
in vars().values() if isinstance(v
, ParserElement
)
1032 # compatibility function, superseded by DelimitedList class
1034 expr
: Union
[str, ParserElement
],
1035 delim
: Union
[str, ParserElement
] = ",",
1036 combine
: bool = False,
1037 min: typing
.Optional
[int] = None,
1038 max: typing
.Optional
[int] = None,
1040 allow_trailing_delim
: bool = False,
1042 """(DEPRECATED - use :class:`DelimitedList` class)"""
1043 return DelimitedList(
1044 expr
, delim
, combine
, min, max, allow_trailing_delim
=allow_trailing_delim
1048 # pre-PEP8 compatible names
1051 anyOpenTag
= any_open_tag
1052 anyCloseTag
= any_close_tag
1053 commonHTMLEntity
= common_html_entity
1054 cStyleComment
= c_style_comment
1055 htmlComment
= html_comment
1056 restOfLine
= rest_of_line
1057 dblSlashComment
= dbl_slash_comment
1058 cppStyleComment
= cpp_style_comment
1059 javaStyleComment
= java_style_comment
1060 pythonStyleComment
= python_style_comment
1062 @replaced_by_pep8(DelimitedList
)
1063 def delimitedList(): ...
1065 @replaced_by_pep8(DelimitedList
)
1066 def delimited_list(): ...
1068 @replaced_by_pep8(counted_array
)
1069 def countedArray(): ...
1071 @replaced_by_pep8(match_previous_literal
)
1072 def matchPreviousLiteral(): ...
1074 @replaced_by_pep8(match_previous_expr
)
1075 def matchPreviousExpr(): ...
1077 @replaced_by_pep8(one_of
)
1080 @replaced_by_pep8(dict_of
)
1083 @replaced_by_pep8(original_text_for
)
1084 def originalTextFor(): ...
1086 @replaced_by_pep8(nested_expr
)
1087 def nestedExpr(): ...
1089 @replaced_by_pep8(make_html_tags
)
1090 def makeHTMLTags(): ...
1092 @replaced_by_pep8(make_xml_tags
)
1093 def makeXMLTags(): ...
1095 @replaced_by_pep8(replace_html_entity
)
1096 def replaceHTMLEntity(): ...
1098 @replaced_by_pep8(infix_notation
)
1099 def infixNotation(): ...