venv/lib/python3.11/site-packages/pip/_vendor/pyparsing/helpers.py

   1 # helpers.py
   2 import html.entities
   3 import re
   4 import sys
   5 import typing
   6
   7 from . import __diag__
   8 from .core import *
   9 from .util import (
  10     _bslash,
  11     _flatten,
  12     _escape_regex_range_chars,
  13     replaced_by_pep8,
  14 )
  15
  16
  17 #
  18 # global helpers
  19 #
  20 def counted_array(
  21     expr: ParserElement,
  22     int_expr: typing.Optional[ParserElement] = None,
  23     *,
  24     intExpr: typing.Optional[ParserElement] = None,
  25 ) -> ParserElement:
  26     """Helper to define a counted list of expressions.
  27
  28     This helper defines a pattern of the form::
  29
  30         integer expr expr expr...
  31
  32     where the leading integer tells how many expr expressions follow.
  33     The matched tokens returns the array of expr tokens as a list - the
  34     leading count token is suppressed.
  35
  36     If ``int_expr`` is specified, it should be a pyparsing expression
  37     that produces an integer value.
  38
  39     Example::
  40
  41         counted_array(Word(alphas)).parse_string('2 ab cd ef')  # -> ['ab', 'cd']
  42
  43         # in this parser, the leading integer value is given in binary,
  44         # '10' indicating that 2 values are in the array
  45         binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
  46         counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef')  # -> ['ab', 'cd']
  47
  48         # if other fields must be parsed after the count but before the
  49         # list items, give the fields results names and they will
  50         # be preserved in the returned ParseResults:
  51         count_with_metadata = integer + Word(alphas)("type")
  52         typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
  53         result = typed_array.parse_string("3 bool True True False")
  54         print(result.dump())
  55
  56         # prints
  57         # ['True', 'True', 'False']
  58         # - items: ['True', 'True', 'False']
  59         # - type: 'bool'
  60     """
  61     intExpr = intExpr or int_expr
  62     array_expr = Forward()
  63
  64     def count_field_parse_action(s, l, t):
  65         nonlocal array_expr
  66         n = t[0]
  67         array_expr <<= (expr * n) if n else Empty()
  68         # clear list contents, but keep any named results
  69         del t[:]
  70
  71     if intExpr is None:
  72         intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
  73     else:
  74         intExpr = intExpr.copy()
  75     intExpr.set_name("arrayLen")
  76     intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
  77     return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")
  78
  79
  80 def match_previous_literal(expr: ParserElement) -> ParserElement:
  81     """Helper to define an expression that is indirectly defined from
  82     the tokens matched in a previous expression, that is, it looks for
  83     a 'repeat' of a previous expression.  For example::
  84
  85         first = Word(nums)
  86         second = match_previous_literal(first)
  87         match_expr = first + ":" + second
  88
  89     will match ``"1:1"``, but not ``"1:2"``.  Because this
  90     matches a previous literal, will also match the leading
  91     ``"1:1"`` in ``"1:10"``. If this is not desired, use
  92     :class:`match_previous_expr`. Do *not* use with packrat parsing
  93     enabled.
  94     """
  95     rep = Forward()
  96
  97     def copy_token_to_repeater(s, l, t):
  98         if t:
  99             if len(t) == 1:
 100                 rep << t[0]
 101             else:
 102                 # flatten t tokens
 103                 tflat = _flatten(t.as_list())
 104                 rep << And(Literal(tt) for tt in tflat)
 105         else:
 106             rep << Empty()
 107
 108     expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
 109     rep.set_name("(prev) " + str(expr))
 110     return rep
 111
 112
 113 def match_previous_expr(expr: ParserElement) -> ParserElement:
 114     """Helper to define an expression that is indirectly defined from
 115     the tokens matched in a previous expression, that is, it looks for
 116     a 'repeat' of a previous expression.  For example::
 117
 118         first = Word(nums)
 119         second = match_previous_expr(first)
 120         match_expr = first + ":" + second
 121
 122     will match ``"1:1"``, but not ``"1:2"``.  Because this
 123     matches by expressions, will *not* match the leading ``"1:1"``
 124     in ``"1:10"``; the expressions are evaluated first, and then
 125     compared, so ``"1"`` is compared with ``"10"``. Do *not* use
 126     with packrat parsing enabled.
 127     """
 128     rep = Forward()
 129     e2 = expr.copy()
 130     rep <<= e2
 131
 132     def copy_token_to_repeater(s, l, t):
 133         matchTokens = _flatten(t.as_list())
 134
 135         def must_match_these_tokens(s, l, t):
 136             theseTokens = _flatten(t.as_list())
 137             if theseTokens != matchTokens:
 138                 raise ParseException(
 139                     s, l, f"Expected {matchTokens}, found{theseTokens}"
 140                 )
 141
 142         rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
 143
 144     expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
 145     rep.set_name("(prev) " + str(expr))
 146     return rep
 147
 148
 149 def one_of(
 150     strs: Union[typing.Iterable[str], str],
 151     caseless: bool = False,
 152     use_regex: bool = True,
 153     as_keyword: bool = False,
 154     *,
 155     useRegex: bool = True,
 156     asKeyword: bool = False,
 157 ) -> ParserElement:
 158     """Helper to quickly define a set of alternative :class:`Literal` s,
 159     and makes sure to do longest-first testing when there is a conflict,
 160     regardless of the input order, but returns
 161     a :class:`MatchFirst` for best performance.
 162
 163     Parameters:
 164
 165     - ``strs`` - a string of space-delimited literals, or a collection of
 166       string literals
 167     - ``caseless`` - treat all literals as caseless - (default= ``False``)
 168     - ``use_regex`` - as an optimization, will
 169       generate a :class:`Regex` object; otherwise, will generate
 170       a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if
 171       creating a :class:`Regex` raises an exception) - (default= ``True``)
 172     - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
 173       generated expressions - (default= ``False``)
 174     - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
 175       but will be removed in a future release
 176
 177     Example::
 178
 179         comp_oper = one_of("< = > <= >= !=")
 180         var = Word(alphas)
 181         number = Word(nums)
 182         term = var | number
 183         comparison_expr = term + comp_oper + term
 184         print(comparison_expr.search_string("B = 12  AA=23 B<=AA AA>12"))
 185
 186     prints::
 187
 188         [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
 189     """
 190     asKeyword = asKeyword or as_keyword
 191     useRegex = useRegex and use_regex
 192
 193     if (
 194         isinstance(caseless, str_type)
 195         and __diag__.warn_on_multiple_string_args_to_oneof
 196     ):
 197         warnings.warn(
 198             "More than one string argument passed to one_of, pass"
 199             " choices as a list or space-delimited string",
 200             stacklevel=2,
 201         )
 202
 203     if caseless:
 204         isequal = lambda a, b: a.upper() == b.upper()
 205         masks = lambda a, b: b.upper().startswith(a.upper())
 206         parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
 207     else:
 208         isequal = lambda a, b: a == b
 209         masks = lambda a, b: b.startswith(a)
 210         parseElementClass = Keyword if asKeyword else Literal
 211
 212     symbols: List[str] = []
 213     if isinstance(strs, str_type):
 214         strs = typing.cast(str, strs)
 215         symbols = strs.split()
 216     elif isinstance(strs, Iterable):
 217         symbols = list(strs)
 218     else:
 219         raise TypeError("Invalid argument to one_of, expected string or iterable")
 220     if not symbols:
 221         return NoMatch()
 222
 223     # reorder given symbols to take care to avoid masking longer choices with shorter ones
 224     # (but only if the given symbols are not just single characters)
 225     if any(len(sym) > 1 for sym in symbols):
 226         i = 0
 227         while i < len(symbols) - 1:
 228             cur = symbols[i]
 229             for j, other in enumerate(symbols[i + 1 :]):
 230                 if isequal(other, cur):
 231                     del symbols[i + j + 1]
 232                     break
 233                 elif masks(cur, other):
 234                     del symbols[i + j + 1]
 235                     symbols.insert(i, other)
 236                     break
 237             else:
 238                 i += 1
 239
 240     if useRegex:
 241         re_flags: int = re.IGNORECASE if caseless else 0
 242
 243         try:
 244             if all(len(sym) == 1 for sym in symbols):
 245                 # symbols are just single characters, create range regex pattern
 246                 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
 247             else:
 248                 patt = "|".join(re.escape(sym) for sym in symbols)
 249
 250             # wrap with \b word break markers if defining as keywords
 251             if asKeyword:
 252                 patt = rf"\b(?:{patt})\b"
 253
 254             ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
 255
 256             if caseless:
 257                 # add parse action to return symbols as specified, not in random
 258                 # casing as found in input string
 259                 symbol_map = {sym.lower(): sym for sym in symbols}
 260                 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
 261
 262             return ret
 263
 264         except re.error:
 265             warnings.warn(
 266                 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
 267             )
 268
 269     # last resort, just use MatchFirst
 270     return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
 271         " | ".join(symbols)
 272     )
 273
 274
 275 def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
 276     """Helper to easily and clearly define a dictionary by specifying
 277     the respective patterns for the key and value.  Takes care of
 278     defining the :class:`Dict`, :class:`ZeroOrMore`, and
 279     :class:`Group` tokens in the proper order.  The key pattern
 280     can include delimiting markers or punctuation, as long as they are
 281     suppressed, thereby leaving the significant key text.  The value
 282     pattern can include named results, so that the :class:`Dict` results
 283     can include named token fields.
 284
 285     Example::
 286
 287         text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
 288         attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
 289         print(attr_expr[1, ...].parse_string(text).dump())
 290
 291         attr_label = label
 292         attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
 293
 294         # similar to Dict, but simpler call format
 295         result = dict_of(attr_label, attr_value).parse_string(text)
 296         print(result.dump())
 297         print(result['shape'])
 298         print(result.shape)  # object attribute access works too
 299         print(result.as_dict())
 300
 301     prints::
 302
 303         [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
 304         - color: 'light blue'
 305         - posn: 'upper left'
 306         - shape: 'SQUARE'
 307         - texture: 'burlap'
 308         SQUARE
 309         SQUARE
 310         {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
 311     """
 312     return Dict(OneOrMore(Group(key + value)))
 313
 314
 315 def original_text_for(
 316     expr: ParserElement, as_string: bool = True, *, asString: bool = True
 317 ) -> ParserElement:
 318     """Helper to return the original, untokenized text for a given
 319     expression.  Useful to restore the parsed fields of an HTML start
 320     tag into the raw tag text itself, or to revert separate tokens with
 321     intervening whitespace back to the original matching input text. By
 322     default, returns a string containing the original parsed text.
 323
 324     If the optional ``as_string`` argument is passed as
 325     ``False``, then the return value is
 326     a :class:`ParseResults` containing any results names that
 327     were originally matched, and a single token containing the original
 328     matched text from the input string.  So if the expression passed to
 329     :class:`original_text_for` contains expressions with defined
 330     results names, you must set ``as_string`` to ``False`` if you
 331     want to preserve those results name values.
 332
 333     The ``asString`` pre-PEP8 argument is retained for compatibility,
 334     but will be removed in a future release.
 335
 336     Example::
 337
 338         src = "this is test <b> bold <i>text</i> </b> normal text "
 339         for tag in ("b", "i"):
 340             opener, closer = make_html_tags(tag)
 341             patt = original_text_for(opener + ... + closer)
 342             print(patt.search_string(src)[0])
 343
 344     prints::
 345
 346         ['<b> bold <i>text</i> </b>']
 347         ['<i>text</i>']
 348     """
 349     asString = asString and as_string
 350
 351     locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
 352     endlocMarker = locMarker.copy()
 353     endlocMarker.callPreparse = False
 354     matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
 355     if asString:
 356         extractText = lambda s, l, t: s[t._original_start : t._original_end]
 357     else:
 358
 359         def extractText(s, l, t):
 360             t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
 361
 362     matchExpr.set_parse_action(extractText)
 363     matchExpr.ignoreExprs = expr.ignoreExprs
 364     matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
 365     return matchExpr
 366
 367
 368 def ungroup(expr: ParserElement) -> ParserElement:
 369     """Helper to undo pyparsing's default grouping of And expressions,
 370     even if all but one are non-empty.
 371     """
 372     return TokenConverter(expr).add_parse_action(lambda t: t[0])
 373
 374
 375 def locatedExpr(expr: ParserElement) -> ParserElement:
 376     """
 377     (DEPRECATED - future code should use the :class:`Located` class)
 378     Helper to decorate a returned token with its starting and ending
 379     locations in the input string.
 380
 381     This helper adds the following results names:
 382
 383     - ``locn_start`` - location where matched expression begins
 384     - ``locn_end`` - location where matched expression ends
 385     - ``value`` - the actual parsed results
 386
 387     Be careful if the input text contains ``<TAB>`` characters, you
 388     may want to call :class:`ParserElement.parse_with_tabs`
 389
 390     Example::
 391
 392         wd = Word(alphas)
 393         for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
 394             print(match)
 395
 396     prints::
 397
 398         [[0, 'ljsdf', 5]]
 399         [[8, 'lksdjjf', 15]]
 400         [[18, 'lkkjj', 23]]
 401     """
 402     locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
 403     return Group(
 404         locator("locn_start")
 405         + expr("value")
 406         + locator.copy().leaveWhitespace()("locn_end")
 407     )
 408
 409
 410 def nested_expr(
 411     opener: Union[str, ParserElement] = "(",
 412     closer: Union[str, ParserElement] = ")",
 413     content: typing.Optional[ParserElement] = None,
 414     ignore_expr: ParserElement = quoted_string(),
 415     *,
 416     ignoreExpr: ParserElement = quoted_string(),
 417 ) -> ParserElement:
 418     """Helper method for defining nested lists enclosed in opening and
 419     closing delimiters (``"("`` and ``")"`` are the default).
 420
 421     Parameters:
 422
 423     - ``opener`` - opening character for a nested list
 424       (default= ``"("``); can also be a pyparsing expression
 425     - ``closer`` - closing character for a nested list
 426       (default= ``")"``); can also be a pyparsing expression
 427     - ``content`` - expression for items within the nested lists
 428       (default= ``None``)
 429     - ``ignore_expr`` - expression for ignoring opening and closing delimiters
 430       (default= :class:`quoted_string`)
 431     - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
 432       but will be removed in a future release
 433
 434     If an expression is not provided for the content argument, the
 435     nested expression will capture all whitespace-delimited content
 436     between delimiters as a list of separate values.
 437
 438     Use the ``ignore_expr`` argument to define expressions that may
 439     contain opening or closing characters that should not be treated as
 440     opening or closing characters for nesting, such as quoted_string or
 441     a comment expression.  Specify multiple expressions using an
 442     :class:`Or` or :class:`MatchFirst`. The default is
 443     :class:`quoted_string`, but if no expressions are to be ignored, then
 444     pass ``None`` for this argument.
 445
 446     Example::
 447
 448         data_type = one_of("void int short long char float double")
 449         decl_data_type = Combine(data_type + Opt(Word('*')))
 450         ident = Word(alphas+'_', alphanums+'_')
 451         number = pyparsing_common.number
 452         arg = Group(decl_data_type + ident)
 453         LPAR, RPAR = map(Suppress, "()")
 454
 455         code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
 456
 457         c_function = (decl_data_type("type")
 458                       + ident("name")
 459                       + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
 460                       + code_body("body"))
 461         c_function.ignore(c_style_comment)
 462
 463         source_code = '''
 464             int is_odd(int x) {
 465                 return (x%2);
 466             }
 467
 468             int dec_to_hex(char hchar) {
 469                 if (hchar >= '0' && hchar <= '9') {
 470                     return (ord(hchar)-ord('0'));
 471                 } else {
 472                     return (10+ord(hchar)-ord('A'));
 473                 }
 474             }
 475         '''
 476         for func in c_function.search_string(source_code):
 477             print("%(name)s (%(type)s) args: %(args)s" % func)
 478
 479
 480     prints::
 481
 482         is_odd (int) args: [['int', 'x']]
 483         dec_to_hex (int) args: [['char', 'hchar']]
 484     """
 485     if ignoreExpr != ignore_expr:
 486         ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
 487     if opener == closer:
 488         raise ValueError("opening and closing strings cannot be the same")
 489     if content is None:
 490         if isinstance(opener, str_type) and isinstance(closer, str_type):
 491             opener = typing.cast(str, opener)
 492             closer = typing.cast(str, closer)
 493             if len(opener) == 1 and len(closer) == 1:
 494                 if ignoreExpr is not None:
 495                     content = Combine(
 496                         OneOrMore(
 497                             ~ignoreExpr
 498                             + CharsNotIn(
 499                                 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
 500                                 exact=1,
 501                             )
 502                         )
 503                     ).set_parse_action(lambda t: t[0].strip())
 504                 else:
 505                     content = empty.copy() + CharsNotIn(
 506                         opener + closer + ParserElement.DEFAULT_WHITE_CHARS
 507                     ).set_parse_action(lambda t: t[0].strip())
 508             else:
 509                 if ignoreExpr is not None:
 510                     content = Combine(
 511                         OneOrMore(
 512                             ~ignoreExpr
 513                             + ~Literal(opener)
 514                             + ~Literal(closer)
 515                             + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
 516                         )
 517                     ).set_parse_action(lambda t: t[0].strip())
 518                 else:
 519                     content = Combine(
 520                         OneOrMore(
 521                             ~Literal(opener)
 522                             + ~Literal(closer)
 523                             + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
 524                         )
 525                     ).set_parse_action(lambda t: t[0].strip())
 526         else:
 527             raise ValueError(
 528                 "opening and closing arguments must be strings if no content expression is given"
 529             )
 530     ret = Forward()
 531     if ignoreExpr is not None:
 532         ret <<= Group(
 533             Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
 534         )
 535     else:
 536         ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
 537     ret.set_name("nested %s%s expression" % (opener, closer))
 538     return ret
 539
 540
 541 def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
 542     """Internal helper to construct opening and closing tag expressions, given a tag name"""
 543     if isinstance(tagStr, str_type):
 544         resname = tagStr
 545         tagStr = Keyword(tagStr, caseless=not xml)
 546     else:
 547         resname = tagStr.name
 548
 549     tagAttrName = Word(alphas, alphanums + "_-:")
 550     if xml:
 551         tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
 552         openTag = (
 553             suppress_LT
 554             + tagStr("tag")
 555             + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
 556             + Opt("/", default=[False])("empty").set_parse_action(
 557                 lambda s, l, t: t[0] == "/"
 558             )
 559             + suppress_GT
 560         )
 561     else:
 562         tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
 563             printables, exclude_chars=">"
 564         )
 565         openTag = (
 566             suppress_LT
 567             + tagStr("tag")
 568             + Dict(
 569                 ZeroOrMore(
 570                     Group(
 571                         tagAttrName.set_parse_action(lambda t: t[0].lower())
 572                         + Opt(Suppress("=") + tagAttrValue)
 573                     )
 574                 )
 575             )
 576             + Opt("/", default=[False])("empty").set_parse_action(
 577                 lambda s, l, t: t[0] == "/"
 578             )
 579             + suppress_GT
 580         )
 581     closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
 582
 583     openTag.set_name("<%s>" % resname)
 584     # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
 585     openTag.add_parse_action(
 586         lambda t: t.__setitem__(
 587             "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
 588         )
 589     )
 590     closeTag = closeTag(
 591         "end" + "".join(resname.replace(":", " ").title().split())
 592     ).set_name("</%s>" % resname)
 593     openTag.tag = resname
 594     closeTag.tag = resname
 595     openTag.tag_body = SkipTo(closeTag())
 596     return openTag, closeTag
 597
 598
 599 def make_html_tags(
 600     tag_str: Union[str, ParserElement]
 601 ) -> Tuple[ParserElement, ParserElement]:
 602     """Helper to construct opening and closing tag expressions for HTML,
 603     given a tag name. Matches tags in either upper or lower case,
 604     attributes with namespaces and with quoted or unquoted values.
 605
 606     Example::
 607
 608         text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
 609         # make_html_tags returns pyparsing expressions for the opening and
 610         # closing tags as a 2-tuple
 611         a, a_end = make_html_tags("A")
 612         link_expr = a + SkipTo(a_end)("link_text") + a_end
 613
 614         for link in link_expr.search_string(text):
 615             # attributes in the <A> tag (like "href" shown here) are
 616             # also accessible as named results
 617             print(link.link_text, '->', link.href)
 618
 619     prints::
 620
 621         pyparsing -> https://github.com/pyparsing/pyparsing/wiki
 622     """
 623     return _makeTags(tag_str, False)
 624
 625
 626 def make_xml_tags(
 627     tag_str: Union[str, ParserElement]
 628 ) -> Tuple[ParserElement, ParserElement]:
 629     """Helper to construct opening and closing tag expressions for XML,
 630     given a tag name. Matches tags only in the given upper/lower case.
 631
 632     Example: similar to :class:`make_html_tags`
 633     """
 634     return _makeTags(tag_str, True)
 635
 636
 637 any_open_tag: ParserElement
 638 any_close_tag: ParserElement
 639 any_open_tag, any_close_tag = make_html_tags(
 640     Word(alphas, alphanums + "_:").set_name("any tag")
 641 )
 642
 643 _htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
 644 common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
 645     "common HTML entity"
 646 )
 647
 648
 649 def replace_html_entity(s, l, t):
 650     """Helper parser action to replace common HTML entities with their special characters"""
 651     return _htmlEntityMap.get(t.entity)
 652
 653
 654 class OpAssoc(Enum):
 655     """Enumeration of operator associativity
 656     - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
 657
 658     LEFT = 1
 659     RIGHT = 2
 660
 661
 662 InfixNotationOperatorArgType = Union[
 663     ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
 664 ]
 665 InfixNotationOperatorSpec = Union[
 666     Tuple[
 667         InfixNotationOperatorArgType,
 668         int,
 669         OpAssoc,
 670         typing.Optional[ParseAction],
 671     ],
 672     Tuple[
 673         InfixNotationOperatorArgType,
 674         int,
 675         OpAssoc,
 676     ],
 677 ]
 678
 679
 680 def infix_notation(
 681     base_expr: ParserElement,
 682     op_list: List[InfixNotationOperatorSpec],
 683     lpar: Union[str, ParserElement] = Suppress("("),
 684     rpar: Union[str, ParserElement] = Suppress(")"),
 685 ) -> ParserElement:
 686     """Helper method for constructing grammars of expressions made up of
 687     operators working in a precedence hierarchy.  Operators may be unary
 688     or binary, left- or right-associative.  Parse actions can also be
 689     attached to operator expressions. The generated parser will also
 690     recognize the use of parentheses to override operator precedences
 691     (see example below).
 692
 693     Note: if you define a deep operator list, you may see performance
 694     issues when using infix_notation. See
 695     :class:`ParserElement.enable_packrat` for a mechanism to potentially
 696     improve your parser performance.
 697
 698     Parameters:
 699
 700     - ``base_expr`` - expression representing the most basic operand to
 701       be used in the expression
 702     - ``op_list`` - list of tuples, one for each operator precedence level
 703       in the expression grammar; each tuple is of the form ``(op_expr,
 704       num_operands, right_left_assoc, (optional)parse_action)``, where:
 705
 706       - ``op_expr`` is the pyparsing expression for the operator; may also
 707         be a string, which will be converted to a Literal; if ``num_operands``
 708         is 3, ``op_expr`` is a tuple of two expressions, for the two
 709         operators separating the 3 terms
 710       - ``num_operands`` is the number of terms for this operator (must be 1,
 711         2, or 3)
 712       - ``right_left_assoc`` is the indicator whether the operator is right
 713         or left associative, using the pyparsing-defined constants
 714         ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
 715       - ``parse_action`` is the parse action to be associated with
 716         expressions matching this operator expression (the parse action
 717         tuple member may be omitted); if the parse action is passed
 718         a tuple or list of functions, this is equivalent to calling
 719         ``set_parse_action(*fn)``
 720         (:class:`ParserElement.set_parse_action`)
 721     - ``lpar`` - expression for matching left-parentheses; if passed as a
 722       str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
 723       an expression (such as ``Literal('(')``), then it will be kept in
 724       the parsed results, and grouped with them. (default= ``Suppress('(')``)
 725     - ``rpar`` - expression for matching right-parentheses; if passed as a
 726       str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
 727       an expression (such as ``Literal(')')``), then it will be kept in
 728       the parsed results, and grouped with them. (default= ``Suppress(')')``)
 729
 730     Example::
 731
 732         # simple example of four-function arithmetic with ints and
 733         # variable names
 734         integer = pyparsing_common.signed_integer
 735         varname = pyparsing_common.identifier
 736
 737         arith_expr = infix_notation(integer | varname,
 738             [
 739             ('-', 1, OpAssoc.RIGHT),
 740             (one_of('* /'), 2, OpAssoc.LEFT),
 741             (one_of('+ -'), 2, OpAssoc.LEFT),
 742             ])
 743
 744         arith_expr.run_tests('''
 745             5+3*6
 746             (5+3)*6
 747             -2--11
 748             ''', full_dump=False)
 749
 750     prints::
 751
 752         5+3*6
 753         [[5, '+', [3, '*', 6]]]
 754
 755         (5+3)*6
 756         [[[5, '+', 3], '*', 6]]
 757
 758         (5+x)*y
 759         [[[5, '+', 'x'], '*', 'y']]
 760
 761         -2--11
 762         [[['-', 2], '-', ['-', 11]]]
 763     """
 764
 765     # captive version of FollowedBy that does not do parse actions or capture results names
 766     class _FB(FollowedBy):
 767         def parseImpl(self, instring, loc, doActions=True):
 768             self.expr.try_parse(instring, loc)
 769             return loc, []
 770
 771     _FB.__name__ = "FollowedBy>"
 772
 773     ret = Forward()
 774     if isinstance(lpar, str):
 775         lpar = Suppress(lpar)
 776     if isinstance(rpar, str):
 777         rpar = Suppress(rpar)
 778
 779     # if lpar and rpar are not suppressed, wrap in group
 780     if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):
 781         lastExpr = base_expr | Group(lpar + ret + rpar)
 782     else:
 783         lastExpr = base_expr | (lpar + ret + rpar)
 784
 785     arity: int
 786     rightLeftAssoc: opAssoc
 787     pa: typing.Optional[ParseAction]
 788     opExpr1: ParserElement
 789     opExpr2: ParserElement
 790     for i, operDef in enumerate(op_list):
 791         opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4]  # type: ignore[assignment]
 792         if isinstance(opExpr, str_type):
 793             opExpr = ParserElement._literalStringClass(opExpr)
 794         opExpr = typing.cast(ParserElement, opExpr)
 795         if arity == 3:
 796             if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
 797                 raise ValueError(
 798                     "if numterms=3, opExpr must be a tuple or list of two expressions"
 799                 )
 800             opExpr1, opExpr2 = opExpr
 801             term_name = f"{opExpr1}{opExpr2} term"
 802         else:
 803             term_name = f"{opExpr} term"
 804
 805         if not 1 <= arity <= 3:
 806             raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
 807
 808         if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
 809             raise ValueError("operator must indicate right or left associativity")
 810
 811         thisExpr: ParserElement = Forward().set_name(term_name)
 812         thisExpr = typing.cast(Forward, thisExpr)
 813         if rightLeftAssoc is OpAssoc.LEFT:
 814             if arity == 1:
 815                 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])
 816             elif arity == 2:
 817                 if opExpr is not None:
 818                     matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(
 819                         lastExpr + (opExpr + lastExpr)[1, ...]
 820                     )
 821                 else:
 822                     matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])
 823             elif arity == 3:
 824                 matchExpr = _FB(
 825                     lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
 826                 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))
 827         elif rightLeftAssoc is OpAssoc.RIGHT:
 828             if arity == 1:
 829                 # try to avoid LR with this extra test
 830                 if not isinstance(opExpr, Opt):
 831                     opExpr = Opt(opExpr)
 832                 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
 833             elif arity == 2:
 834                 if opExpr is not None:
 835                     matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(
 836                         lastExpr + (opExpr + thisExpr)[1, ...]
 837                     )
 838                 else:
 839                     matchExpr = _FB(lastExpr + thisExpr) + Group(
 840                         lastExpr + thisExpr[1, ...]
 841                     )
 842             elif arity == 3:
 843                 matchExpr = _FB(
 844                     lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
 845                 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
 846         if pa:
 847             if isinstance(pa, (tuple, list)):
 848                 matchExpr.set_parse_action(*pa)
 849             else:
 850                 matchExpr.set_parse_action(pa)
 851         thisExpr <<= (matchExpr | lastExpr).setName(term_name)
 852         lastExpr = thisExpr
 853     ret <<= lastExpr
 854     return ret
 855
 856
 857 def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
 858     """
 859     (DEPRECATED - use :class:`IndentedBlock` class instead)
 860     Helper method for defining space-delimited indentation blocks,
 861     such as those used to define block statements in Python source code.
 862
 863     Parameters:
 864
 865     - ``blockStatementExpr`` - expression defining syntax of statement that
 866       is repeated within the indented block
 867     - ``indentStack`` - list created by caller to manage indentation stack
 868       (multiple ``statementWithIndentedBlock`` expressions within a single
 869       grammar should share a common ``indentStack``)
 870     - ``indent`` - boolean indicating whether block must be indented beyond
 871       the current level; set to ``False`` for block of left-most statements
 872       (default= ``True``)
 873
 874     A valid block must contain at least one ``blockStatement``.
 875
 876     (Note that indentedBlock uses internal parse actions which make it
 877     incompatible with packrat parsing.)
 878
 879     Example::
 880
 881         data = '''
 882         def A(z):
 883           A1
 884           B = 100
 885           G = A2
 886           A2
 887           A3
 888         B
 889         def BB(a,b,c):
 890           BB1
 891           def BBA():
 892             bba1
 893             bba2
 894             bba3
 895         C
 896         D
 897         def spam(x,y):
 898              def eggs(z):
 899                  pass
 900         '''
 901
 902
 903         indentStack = [1]
 904         stmt = Forward()
 905
 906         identifier = Word(alphas, alphanums)
 907         funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
 908         func_body = indentedBlock(stmt, indentStack)
 909         funcDef = Group(funcDecl + func_body)
 910
 911         rvalue = Forward()
 912         funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
 913         rvalue << (funcCall | identifier | Word(nums))
 914         assignment = Group(identifier + "=" + rvalue)
 915         stmt << (funcDef | assignment | identifier)
 916
 917         module_body = stmt[1, ...]
 918
 919         parseTree = module_body.parseString(data)
 920         parseTree.pprint()
 921
 922     prints::
 923
 924         [['def',
 925           'A',
 926           ['(', 'z', ')'],
 927           ':',
 928           [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
 929          'B',
 930          ['def',
 931           'BB',
 932           ['(', 'a', 'b', 'c', ')'],
 933           ':',
 934           [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
 935          'C',
 936          'D',
 937          ['def',
 938           'spam',
 939           ['(', 'x', 'y', ')'],
 940           ':',
 941           [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
 942     """
 943     backup_stacks.append(indentStack[:])
 944
 945     def reset_stack():
 946         indentStack[:] = backup_stacks[-1]
 947
 948     def checkPeerIndent(s, l, t):
 949         if l >= len(s):
 950             return
 951         curCol = col(l, s)
 952         if curCol != indentStack[-1]:
 953             if curCol > indentStack[-1]:
 954                 raise ParseException(s, l, "illegal nesting")
 955             raise ParseException(s, l, "not a peer entry")
 956
 957     def checkSubIndent(s, l, t):
 958         curCol = col(l, s)
 959         if curCol > indentStack[-1]:
 960             indentStack.append(curCol)
 961         else:
 962             raise ParseException(s, l, "not a subentry")
 963
 964     def checkUnindent(s, l, t):
 965         if l >= len(s):
 966             return
 967         curCol = col(l, s)
 968         if not (indentStack and curCol in indentStack):
 969             raise ParseException(s, l, "not an unindent")
 970         if curCol < indentStack[-1]:
 971             indentStack.pop()
 972
 973     NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
 974     INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
 975     PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
 976     UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
 977     if indent:
 978         smExpr = Group(
 979             Opt(NL)
 980             + INDENT
 981             + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
 982             + UNDENT
 983         )
 984     else:
 985         smExpr = Group(
 986             Opt(NL)
 987             + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
 988             + Opt(UNDENT)
 989         )
 990
 991     # add a parse action to remove backup_stack from list of backups
 992     smExpr.add_parse_action(
 993         lambda: backup_stacks.pop(-1) and None if backup_stacks else None
 994     )
 995     smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
 996     blockStatementExpr.ignore(_bslash + LineEnd())
 997     return smExpr.set_name("indented block")
 998
 999
1000 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
1001 c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
1002     "C style comment"
1003 )
1004 "Comment of the form ``/* ... */``"
1005
1006 html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
1007 "Comment of the form ``<!-- ... -->``"
1008
1009 rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
1010 dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
1011 "Comment of the form ``// ... (to end of line)``"
1012
1013 cpp_style_comment = Combine(
1014     Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
1015 ).set_name("C++ style comment")
1016 "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1017
1018 java_style_comment = cpp_style_comment
1019 "Same as :class:`cpp_style_comment`"
1020
1021 python_style_comment = Regex(r"#.*").set_name("Python style comment")
1022 "Comment of the form ``# ... (to end of line)``"
1023
1024
1025 # build list of built-in expressions, for future reference if a global default value
1026 # gets updated
1027 _builtin_exprs: List[ParserElement] = [
1028     v for v in vars().values() if isinstance(v, ParserElement)
1029 ]
1030
1031
1032 # compatibility function, superseded by DelimitedList class
1033 def delimited_list(
1034     expr: Union[str, ParserElement],
1035     delim: Union[str, ParserElement] = ",",
1036     combine: bool = False,
1037     min: typing.Optional[int] = None,
1038     max: typing.Optional[int] = None,
1039     *,
1040     allow_trailing_delim: bool = False,
1041 ) -> ParserElement:
1042     """(DEPRECATED - use :class:`DelimitedList` class)"""
1043     return DelimitedList(
1044         expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
1045     )
1046
1047
1048 # pre-PEP8 compatible names
1049 # fmt: off
1050 opAssoc = OpAssoc
1051 anyOpenTag = any_open_tag
1052 anyCloseTag = any_close_tag
1053 commonHTMLEntity = common_html_entity
1054 cStyleComment = c_style_comment
1055 htmlComment = html_comment
1056 restOfLine = rest_of_line
1057 dblSlashComment = dbl_slash_comment
1058 cppStyleComment = cpp_style_comment
1059 javaStyleComment = java_style_comment
1060 pythonStyleComment = python_style_comment
1061
1062 @replaced_by_pep8(DelimitedList)
1063 def delimitedList(): ...
1064
1065 @replaced_by_pep8(DelimitedList)
1066 def delimited_list(): ...
1067
1068 @replaced_by_pep8(counted_array)
1069 def countedArray(): ...
1070
1071 @replaced_by_pep8(match_previous_literal)
1072 def matchPreviousLiteral(): ...
1073
1074 @replaced_by_pep8(match_previous_expr)
1075 def matchPreviousExpr(): ...
1076
1077 @replaced_by_pep8(one_of)
1078 def oneOf(): ...
1079
1080 @replaced_by_pep8(dict_of)
1081 def dictOf(): ...
1082
1083 @replaced_by_pep8(original_text_for)
1084 def originalTextFor(): ...
1085
1086 @replaced_by_pep8(nested_expr)
1087 def nestedExpr(): ...
1088
1089 @replaced_by_pep8(make_html_tags)
1090 def makeHTMLTags(): ...
1091
1092 @replaced_by_pep8(make_xml_tags)
1093 def makeXMLTags(): ...
1094
1095 @replaced_by_pep8(replace_html_entity)
1096 def replaceHTMLEntity(): ...
1097
1098 @replaced_by_pep8(infix_notation)
1099 def infixNotation(): ...
1100 # fmt: on