]> jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pip/_vendor/pyparsing/helpers.py
init: venv aand flask
[dlqueue.git] / venv / lib / python3.11 / site-packages / pip / _vendor / pyparsing / helpers.py
1 # helpers.py
2 import html.entities
3 import re
4 import sys
5 import typing
6
7 from . import __diag__
8 from .core import *
9 from .util import (
10 _bslash,
11 _flatten,
12 _escape_regex_range_chars,
13 replaced_by_pep8,
14 )
15
16
17 #
18 # global helpers
19 #
20 def counted_array(
21 expr: ParserElement,
22 int_expr: typing.Optional[ParserElement] = None,
23 *,
24 intExpr: typing.Optional[ParserElement] = None,
25 ) -> ParserElement:
26 """Helper to define a counted list of expressions.
27
28 This helper defines a pattern of the form::
29
30 integer expr expr expr...
31
32 where the leading integer tells how many expr expressions follow.
33 The matched tokens returns the array of expr tokens as a list - the
34 leading count token is suppressed.
35
36 If ``int_expr`` is specified, it should be a pyparsing expression
37 that produces an integer value.
38
39 Example::
40
41 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']
42
43 # in this parser, the leading integer value is given in binary,
44 # '10' indicating that 2 values are in the array
45 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
46 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']
47
48 # if other fields must be parsed after the count but before the
49 # list items, give the fields results names and they will
50 # be preserved in the returned ParseResults:
51 count_with_metadata = integer + Word(alphas)("type")
52 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
53 result = typed_array.parse_string("3 bool True True False")
54 print(result.dump())
55
56 # prints
57 # ['True', 'True', 'False']
58 # - items: ['True', 'True', 'False']
59 # - type: 'bool'
60 """
61 intExpr = intExpr or int_expr
62 array_expr = Forward()
63
64 def count_field_parse_action(s, l, t):
65 nonlocal array_expr
66 n = t[0]
67 array_expr <<= (expr * n) if n else Empty()
68 # clear list contents, but keep any named results
69 del t[:]
70
71 if intExpr is None:
72 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
73 else:
74 intExpr = intExpr.copy()
75 intExpr.set_name("arrayLen")
76 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
77 return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")
78
79
80 def match_previous_literal(expr: ParserElement) -> ParserElement:
81 """Helper to define an expression that is indirectly defined from
82 the tokens matched in a previous expression, that is, it looks for
83 a 'repeat' of a previous expression. For example::
84
85 first = Word(nums)
86 second = match_previous_literal(first)
87 match_expr = first + ":" + second
88
89 will match ``"1:1"``, but not ``"1:2"``. Because this
90 matches a previous literal, will also match the leading
91 ``"1:1"`` in ``"1:10"``. If this is not desired, use
92 :class:`match_previous_expr`. Do *not* use with packrat parsing
93 enabled.
94 """
95 rep = Forward()
96
97 def copy_token_to_repeater(s, l, t):
98 if t:
99 if len(t) == 1:
100 rep << t[0]
101 else:
102 # flatten t tokens
103 tflat = _flatten(t.as_list())
104 rep << And(Literal(tt) for tt in tflat)
105 else:
106 rep << Empty()
107
108 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
109 rep.set_name("(prev) " + str(expr))
110 return rep
111
112
113 def match_previous_expr(expr: ParserElement) -> ParserElement:
114 """Helper to define an expression that is indirectly defined from
115 the tokens matched in a previous expression, that is, it looks for
116 a 'repeat' of a previous expression. For example::
117
118 first = Word(nums)
119 second = match_previous_expr(first)
120 match_expr = first + ":" + second
121
122 will match ``"1:1"``, but not ``"1:2"``. Because this
123 matches by expressions, will *not* match the leading ``"1:1"``
124 in ``"1:10"``; the expressions are evaluated first, and then
125 compared, so ``"1"`` is compared with ``"10"``. Do *not* use
126 with packrat parsing enabled.
127 """
128 rep = Forward()
129 e2 = expr.copy()
130 rep <<= e2
131
132 def copy_token_to_repeater(s, l, t):
133 matchTokens = _flatten(t.as_list())
134
135 def must_match_these_tokens(s, l, t):
136 theseTokens = _flatten(t.as_list())
137 if theseTokens != matchTokens:
138 raise ParseException(
139 s, l, f"Expected {matchTokens}, found{theseTokens}"
140 )
141
142 rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
143
144 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
145 rep.set_name("(prev) " + str(expr))
146 return rep
147
148
149 def one_of(
150 strs: Union[typing.Iterable[str], str],
151 caseless: bool = False,
152 use_regex: bool = True,
153 as_keyword: bool = False,
154 *,
155 useRegex: bool = True,
156 asKeyword: bool = False,
157 ) -> ParserElement:
158 """Helper to quickly define a set of alternative :class:`Literal` s,
159 and makes sure to do longest-first testing when there is a conflict,
160 regardless of the input order, but returns
161 a :class:`MatchFirst` for best performance.
162
163 Parameters:
164
165 - ``strs`` - a string of space-delimited literals, or a collection of
166 string literals
167 - ``caseless`` - treat all literals as caseless - (default= ``False``)
168 - ``use_regex`` - as an optimization, will
169 generate a :class:`Regex` object; otherwise, will generate
170 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if
171 creating a :class:`Regex` raises an exception) - (default= ``True``)
172 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
173 generated expressions - (default= ``False``)
174 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
175 but will be removed in a future release
176
177 Example::
178
179 comp_oper = one_of("< = > <= >= !=")
180 var = Word(alphas)
181 number = Word(nums)
182 term = var | number
183 comparison_expr = term + comp_oper + term
184 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
185
186 prints::
187
188 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
189 """
190 asKeyword = asKeyword or as_keyword
191 useRegex = useRegex and use_regex
192
193 if (
194 isinstance(caseless, str_type)
195 and __diag__.warn_on_multiple_string_args_to_oneof
196 ):
197 warnings.warn(
198 "More than one string argument passed to one_of, pass"
199 " choices as a list or space-delimited string",
200 stacklevel=2,
201 )
202
203 if caseless:
204 isequal = lambda a, b: a.upper() == b.upper()
205 masks = lambda a, b: b.upper().startswith(a.upper())
206 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
207 else:
208 isequal = lambda a, b: a == b
209 masks = lambda a, b: b.startswith(a)
210 parseElementClass = Keyword if asKeyword else Literal
211
212 symbols: List[str] = []
213 if isinstance(strs, str_type):
214 strs = typing.cast(str, strs)
215 symbols = strs.split()
216 elif isinstance(strs, Iterable):
217 symbols = list(strs)
218 else:
219 raise TypeError("Invalid argument to one_of, expected string or iterable")
220 if not symbols:
221 return NoMatch()
222
223 # reorder given symbols to take care to avoid masking longer choices with shorter ones
224 # (but only if the given symbols are not just single characters)
225 if any(len(sym) > 1 for sym in symbols):
226 i = 0
227 while i < len(symbols) - 1:
228 cur = symbols[i]
229 for j, other in enumerate(symbols[i + 1 :]):
230 if isequal(other, cur):
231 del symbols[i + j + 1]
232 break
233 elif masks(cur, other):
234 del symbols[i + j + 1]
235 symbols.insert(i, other)
236 break
237 else:
238 i += 1
239
240 if useRegex:
241 re_flags: int = re.IGNORECASE if caseless else 0
242
243 try:
244 if all(len(sym) == 1 for sym in symbols):
245 # symbols are just single characters, create range regex pattern
246 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
247 else:
248 patt = "|".join(re.escape(sym) for sym in symbols)
249
250 # wrap with \b word break markers if defining as keywords
251 if asKeyword:
252 patt = rf"\b(?:{patt})\b"
253
254 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
255
256 if caseless:
257 # add parse action to return symbols as specified, not in random
258 # casing as found in input string
259 symbol_map = {sym.lower(): sym for sym in symbols}
260 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
261
262 return ret
263
264 except re.error:
265 warnings.warn(
266 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
267 )
268
269 # last resort, just use MatchFirst
270 return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
271 " | ".join(symbols)
272 )
273
274
275 def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
276 """Helper to easily and clearly define a dictionary by specifying
277 the respective patterns for the key and value. Takes care of
278 defining the :class:`Dict`, :class:`ZeroOrMore`, and
279 :class:`Group` tokens in the proper order. The key pattern
280 can include delimiting markers or punctuation, as long as they are
281 suppressed, thereby leaving the significant key text. The value
282 pattern can include named results, so that the :class:`Dict` results
283 can include named token fields.
284
285 Example::
286
287 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
288 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
289 print(attr_expr[1, ...].parse_string(text).dump())
290
291 attr_label = label
292 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
293
294 # similar to Dict, but simpler call format
295 result = dict_of(attr_label, attr_value).parse_string(text)
296 print(result.dump())
297 print(result['shape'])
298 print(result.shape) # object attribute access works too
299 print(result.as_dict())
300
301 prints::
302
303 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
304 - color: 'light blue'
305 - posn: 'upper left'
306 - shape: 'SQUARE'
307 - texture: 'burlap'
308 SQUARE
309 SQUARE
310 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
311 """
312 return Dict(OneOrMore(Group(key + value)))
313
314
315 def original_text_for(
316 expr: ParserElement, as_string: bool = True, *, asString: bool = True
317 ) -> ParserElement:
318 """Helper to return the original, untokenized text for a given
319 expression. Useful to restore the parsed fields of an HTML start
320 tag into the raw tag text itself, or to revert separate tokens with
321 intervening whitespace back to the original matching input text. By
322 default, returns a string containing the original parsed text.
323
324 If the optional ``as_string`` argument is passed as
325 ``False``, then the return value is
326 a :class:`ParseResults` containing any results names that
327 were originally matched, and a single token containing the original
328 matched text from the input string. So if the expression passed to
329 :class:`original_text_for` contains expressions with defined
330 results names, you must set ``as_string`` to ``False`` if you
331 want to preserve those results name values.
332
333 The ``asString`` pre-PEP8 argument is retained for compatibility,
334 but will be removed in a future release.
335
336 Example::
337
338 src = "this is test <b> bold <i>text</i> </b> normal text "
339 for tag in ("b", "i"):
340 opener, closer = make_html_tags(tag)
341 patt = original_text_for(opener + ... + closer)
342 print(patt.search_string(src)[0])
343
344 prints::
345
346 ['<b> bold <i>text</i> </b>']
347 ['<i>text</i>']
348 """
349 asString = asString and as_string
350
351 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
352 endlocMarker = locMarker.copy()
353 endlocMarker.callPreparse = False
354 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
355 if asString:
356 extractText = lambda s, l, t: s[t._original_start : t._original_end]
357 else:
358
359 def extractText(s, l, t):
360 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
361
362 matchExpr.set_parse_action(extractText)
363 matchExpr.ignoreExprs = expr.ignoreExprs
364 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
365 return matchExpr
366
367
368 def ungroup(expr: ParserElement) -> ParserElement:
369 """Helper to undo pyparsing's default grouping of And expressions,
370 even if all but one are non-empty.
371 """
372 return TokenConverter(expr).add_parse_action(lambda t: t[0])
373
374
375 def locatedExpr(expr: ParserElement) -> ParserElement:
376 """
377 (DEPRECATED - future code should use the :class:`Located` class)
378 Helper to decorate a returned token with its starting and ending
379 locations in the input string.
380
381 This helper adds the following results names:
382
383 - ``locn_start`` - location where matched expression begins
384 - ``locn_end`` - location where matched expression ends
385 - ``value`` - the actual parsed results
386
387 Be careful if the input text contains ``<TAB>`` characters, you
388 may want to call :class:`ParserElement.parse_with_tabs`
389
390 Example::
391
392 wd = Word(alphas)
393 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
394 print(match)
395
396 prints::
397
398 [[0, 'ljsdf', 5]]
399 [[8, 'lksdjjf', 15]]
400 [[18, 'lkkjj', 23]]
401 """
402 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
403 return Group(
404 locator("locn_start")
405 + expr("value")
406 + locator.copy().leaveWhitespace()("locn_end")
407 )
408
409
410 def nested_expr(
411 opener: Union[str, ParserElement] = "(",
412 closer: Union[str, ParserElement] = ")",
413 content: typing.Optional[ParserElement] = None,
414 ignore_expr: ParserElement = quoted_string(),
415 *,
416 ignoreExpr: ParserElement = quoted_string(),
417 ) -> ParserElement:
418 """Helper method for defining nested lists enclosed in opening and
419 closing delimiters (``"("`` and ``")"`` are the default).
420
421 Parameters:
422
423 - ``opener`` - opening character for a nested list
424 (default= ``"("``); can also be a pyparsing expression
425 - ``closer`` - closing character for a nested list
426 (default= ``")"``); can also be a pyparsing expression
427 - ``content`` - expression for items within the nested lists
428 (default= ``None``)
429 - ``ignore_expr`` - expression for ignoring opening and closing delimiters
430 (default= :class:`quoted_string`)
431 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
432 but will be removed in a future release
433
434 If an expression is not provided for the content argument, the
435 nested expression will capture all whitespace-delimited content
436 between delimiters as a list of separate values.
437
438 Use the ``ignore_expr`` argument to define expressions that may
439 contain opening or closing characters that should not be treated as
440 opening or closing characters for nesting, such as quoted_string or
441 a comment expression. Specify multiple expressions using an
442 :class:`Or` or :class:`MatchFirst`. The default is
443 :class:`quoted_string`, but if no expressions are to be ignored, then
444 pass ``None`` for this argument.
445
446 Example::
447
448 data_type = one_of("void int short long char float double")
449 decl_data_type = Combine(data_type + Opt(Word('*')))
450 ident = Word(alphas+'_', alphanums+'_')
451 number = pyparsing_common.number
452 arg = Group(decl_data_type + ident)
453 LPAR, RPAR = map(Suppress, "()")
454
455 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
456
457 c_function = (decl_data_type("type")
458 + ident("name")
459 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
460 + code_body("body"))
461 c_function.ignore(c_style_comment)
462
463 source_code = '''
464 int is_odd(int x) {
465 return (x%2);
466 }
467
468 int dec_to_hex(char hchar) {
469 if (hchar >= '0' && hchar <= '9') {
470 return (ord(hchar)-ord('0'));
471 } else {
472 return (10+ord(hchar)-ord('A'));
473 }
474 }
475 '''
476 for func in c_function.search_string(source_code):
477 print("%(name)s (%(type)s) args: %(args)s" % func)
478
479
480 prints::
481
482 is_odd (int) args: [['int', 'x']]
483 dec_to_hex (int) args: [['char', 'hchar']]
484 """
485 if ignoreExpr != ignore_expr:
486 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
487 if opener == closer:
488 raise ValueError("opening and closing strings cannot be the same")
489 if content is None:
490 if isinstance(opener, str_type) and isinstance(closer, str_type):
491 opener = typing.cast(str, opener)
492 closer = typing.cast(str, closer)
493 if len(opener) == 1 and len(closer) == 1:
494 if ignoreExpr is not None:
495 content = Combine(
496 OneOrMore(
497 ~ignoreExpr
498 + CharsNotIn(
499 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
500 exact=1,
501 )
502 )
503 ).set_parse_action(lambda t: t[0].strip())
504 else:
505 content = empty.copy() + CharsNotIn(
506 opener + closer + ParserElement.DEFAULT_WHITE_CHARS
507 ).set_parse_action(lambda t: t[0].strip())
508 else:
509 if ignoreExpr is not None:
510 content = Combine(
511 OneOrMore(
512 ~ignoreExpr
513 + ~Literal(opener)
514 + ~Literal(closer)
515 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
516 )
517 ).set_parse_action(lambda t: t[0].strip())
518 else:
519 content = Combine(
520 OneOrMore(
521 ~Literal(opener)
522 + ~Literal(closer)
523 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
524 )
525 ).set_parse_action(lambda t: t[0].strip())
526 else:
527 raise ValueError(
528 "opening and closing arguments must be strings if no content expression is given"
529 )
530 ret = Forward()
531 if ignoreExpr is not None:
532 ret <<= Group(
533 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
534 )
535 else:
536 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
537 ret.set_name("nested %s%s expression" % (opener, closer))
538 return ret
539
540
541 def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
542 """Internal helper to construct opening and closing tag expressions, given a tag name"""
543 if isinstance(tagStr, str_type):
544 resname = tagStr
545 tagStr = Keyword(tagStr, caseless=not xml)
546 else:
547 resname = tagStr.name
548
549 tagAttrName = Word(alphas, alphanums + "_-:")
550 if xml:
551 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
552 openTag = (
553 suppress_LT
554 + tagStr("tag")
555 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
556 + Opt("/", default=[False])("empty").set_parse_action(
557 lambda s, l, t: t[0] == "/"
558 )
559 + suppress_GT
560 )
561 else:
562 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
563 printables, exclude_chars=">"
564 )
565 openTag = (
566 suppress_LT
567 + tagStr("tag")
568 + Dict(
569 ZeroOrMore(
570 Group(
571 tagAttrName.set_parse_action(lambda t: t[0].lower())
572 + Opt(Suppress("=") + tagAttrValue)
573 )
574 )
575 )
576 + Opt("/", default=[False])("empty").set_parse_action(
577 lambda s, l, t: t[0] == "/"
578 )
579 + suppress_GT
580 )
581 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
582
583 openTag.set_name("<%s>" % resname)
584 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
585 openTag.add_parse_action(
586 lambda t: t.__setitem__(
587 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
588 )
589 )
590 closeTag = closeTag(
591 "end" + "".join(resname.replace(":", " ").title().split())
592 ).set_name("</%s>" % resname)
593 openTag.tag = resname
594 closeTag.tag = resname
595 openTag.tag_body = SkipTo(closeTag())
596 return openTag, closeTag
597
598
599 def make_html_tags(
600 tag_str: Union[str, ParserElement]
601 ) -> Tuple[ParserElement, ParserElement]:
602 """Helper to construct opening and closing tag expressions for HTML,
603 given a tag name. Matches tags in either upper or lower case,
604 attributes with namespaces and with quoted or unquoted values.
605
606 Example::
607
608 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
609 # make_html_tags returns pyparsing expressions for the opening and
610 # closing tags as a 2-tuple
611 a, a_end = make_html_tags("A")
612 link_expr = a + SkipTo(a_end)("link_text") + a_end
613
614 for link in link_expr.search_string(text):
615 # attributes in the <A> tag (like "href" shown here) are
616 # also accessible as named results
617 print(link.link_text, '->', link.href)
618
619 prints::
620
621 pyparsing -> https://github.com/pyparsing/pyparsing/wiki
622 """
623 return _makeTags(tag_str, False)
624
625
626 def make_xml_tags(
627 tag_str: Union[str, ParserElement]
628 ) -> Tuple[ParserElement, ParserElement]:
629 """Helper to construct opening and closing tag expressions for XML,
630 given a tag name. Matches tags only in the given upper/lower case.
631
632 Example: similar to :class:`make_html_tags`
633 """
634 return _makeTags(tag_str, True)
635
636
637 any_open_tag: ParserElement
638 any_close_tag: ParserElement
639 any_open_tag, any_close_tag = make_html_tags(
640 Word(alphas, alphanums + "_:").set_name("any tag")
641 )
642
643 _htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
644 common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
645 "common HTML entity"
646 )
647
648
649 def replace_html_entity(s, l, t):
650 """Helper parser action to replace common HTML entities with their special characters"""
651 return _htmlEntityMap.get(t.entity)
652
653
654 class OpAssoc(Enum):
655 """Enumeration of operator associativity
656 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
657
658 LEFT = 1
659 RIGHT = 2
660
661
662 InfixNotationOperatorArgType = Union[
663 ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
664 ]
665 InfixNotationOperatorSpec = Union[
666 Tuple[
667 InfixNotationOperatorArgType,
668 int,
669 OpAssoc,
670 typing.Optional[ParseAction],
671 ],
672 Tuple[
673 InfixNotationOperatorArgType,
674 int,
675 OpAssoc,
676 ],
677 ]
678
679
680 def infix_notation(
681 base_expr: ParserElement,
682 op_list: List[InfixNotationOperatorSpec],
683 lpar: Union[str, ParserElement] = Suppress("("),
684 rpar: Union[str, ParserElement] = Suppress(")"),
685 ) -> ParserElement:
686 """Helper method for constructing grammars of expressions made up of
687 operators working in a precedence hierarchy. Operators may be unary
688 or binary, left- or right-associative. Parse actions can also be
689 attached to operator expressions. The generated parser will also
690 recognize the use of parentheses to override operator precedences
691 (see example below).
692
693 Note: if you define a deep operator list, you may see performance
694 issues when using infix_notation. See
695 :class:`ParserElement.enable_packrat` for a mechanism to potentially
696 improve your parser performance.
697
698 Parameters:
699
700 - ``base_expr`` - expression representing the most basic operand to
701 be used in the expression
702 - ``op_list`` - list of tuples, one for each operator precedence level
703 in the expression grammar; each tuple is of the form ``(op_expr,
704 num_operands, right_left_assoc, (optional)parse_action)``, where:
705
706 - ``op_expr`` is the pyparsing expression for the operator; may also
707 be a string, which will be converted to a Literal; if ``num_operands``
708 is 3, ``op_expr`` is a tuple of two expressions, for the two
709 operators separating the 3 terms
710 - ``num_operands`` is the number of terms for this operator (must be 1,
711 2, or 3)
712 - ``right_left_assoc`` is the indicator whether the operator is right
713 or left associative, using the pyparsing-defined constants
714 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
715 - ``parse_action`` is the parse action to be associated with
716 expressions matching this operator expression (the parse action
717 tuple member may be omitted); if the parse action is passed
718 a tuple or list of functions, this is equivalent to calling
719 ``set_parse_action(*fn)``
720 (:class:`ParserElement.set_parse_action`)
721 - ``lpar`` - expression for matching left-parentheses; if passed as a
722 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
723 an expression (such as ``Literal('(')``), then it will be kept in
724 the parsed results, and grouped with them. (default= ``Suppress('(')``)
725 - ``rpar`` - expression for matching right-parentheses; if passed as a
726 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
727 an expression (such as ``Literal(')')``), then it will be kept in
728 the parsed results, and grouped with them. (default= ``Suppress(')')``)
729
730 Example::
731
732 # simple example of four-function arithmetic with ints and
733 # variable names
734 integer = pyparsing_common.signed_integer
735 varname = pyparsing_common.identifier
736
737 arith_expr = infix_notation(integer | varname,
738 [
739 ('-', 1, OpAssoc.RIGHT),
740 (one_of('* /'), 2, OpAssoc.LEFT),
741 (one_of('+ -'), 2, OpAssoc.LEFT),
742 ])
743
744 arith_expr.run_tests('''
745 5+3*6
746 (5+3)*6
747 -2--11
748 ''', full_dump=False)
749
750 prints::
751
752 5+3*6
753 [[5, '+', [3, '*', 6]]]
754
755 (5+3)*6
756 [[[5, '+', 3], '*', 6]]
757
758 (5+x)*y
759 [[[5, '+', 'x'], '*', 'y']]
760
761 -2--11
762 [[['-', 2], '-', ['-', 11]]]
763 """
764
765 # captive version of FollowedBy that does not do parse actions or capture results names
766 class _FB(FollowedBy):
767 def parseImpl(self, instring, loc, doActions=True):
768 self.expr.try_parse(instring, loc)
769 return loc, []
770
771 _FB.__name__ = "FollowedBy>"
772
773 ret = Forward()
774 if isinstance(lpar, str):
775 lpar = Suppress(lpar)
776 if isinstance(rpar, str):
777 rpar = Suppress(rpar)
778
779 # if lpar and rpar are not suppressed, wrap in group
780 if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):
781 lastExpr = base_expr | Group(lpar + ret + rpar)
782 else:
783 lastExpr = base_expr | (lpar + ret + rpar)
784
785 arity: int
786 rightLeftAssoc: opAssoc
787 pa: typing.Optional[ParseAction]
788 opExpr1: ParserElement
789 opExpr2: ParserElement
790 for i, operDef in enumerate(op_list):
791 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]
792 if isinstance(opExpr, str_type):
793 opExpr = ParserElement._literalStringClass(opExpr)
794 opExpr = typing.cast(ParserElement, opExpr)
795 if arity == 3:
796 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
797 raise ValueError(
798 "if numterms=3, opExpr must be a tuple or list of two expressions"
799 )
800 opExpr1, opExpr2 = opExpr
801 term_name = f"{opExpr1}{opExpr2} term"
802 else:
803 term_name = f"{opExpr} term"
804
805 if not 1 <= arity <= 3:
806 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
807
808 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
809 raise ValueError("operator must indicate right or left associativity")
810
811 thisExpr: ParserElement = Forward().set_name(term_name)
812 thisExpr = typing.cast(Forward, thisExpr)
813 if rightLeftAssoc is OpAssoc.LEFT:
814 if arity == 1:
815 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])
816 elif arity == 2:
817 if opExpr is not None:
818 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(
819 lastExpr + (opExpr + lastExpr)[1, ...]
820 )
821 else:
822 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])
823 elif arity == 3:
824 matchExpr = _FB(
825 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
826 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))
827 elif rightLeftAssoc is OpAssoc.RIGHT:
828 if arity == 1:
829 # try to avoid LR with this extra test
830 if not isinstance(opExpr, Opt):
831 opExpr = Opt(opExpr)
832 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
833 elif arity == 2:
834 if opExpr is not None:
835 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(
836 lastExpr + (opExpr + thisExpr)[1, ...]
837 )
838 else:
839 matchExpr = _FB(lastExpr + thisExpr) + Group(
840 lastExpr + thisExpr[1, ...]
841 )
842 elif arity == 3:
843 matchExpr = _FB(
844 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
845 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
846 if pa:
847 if isinstance(pa, (tuple, list)):
848 matchExpr.set_parse_action(*pa)
849 else:
850 matchExpr.set_parse_action(pa)
851 thisExpr <<= (matchExpr | lastExpr).setName(term_name)
852 lastExpr = thisExpr
853 ret <<= lastExpr
854 return ret
855
856
857 def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
858 """
859 (DEPRECATED - use :class:`IndentedBlock` class instead)
860 Helper method for defining space-delimited indentation blocks,
861 such as those used to define block statements in Python source code.
862
863 Parameters:
864
865 - ``blockStatementExpr`` - expression defining syntax of statement that
866 is repeated within the indented block
867 - ``indentStack`` - list created by caller to manage indentation stack
868 (multiple ``statementWithIndentedBlock`` expressions within a single
869 grammar should share a common ``indentStack``)
870 - ``indent`` - boolean indicating whether block must be indented beyond
871 the current level; set to ``False`` for block of left-most statements
872 (default= ``True``)
873
874 A valid block must contain at least one ``blockStatement``.
875
876 (Note that indentedBlock uses internal parse actions which make it
877 incompatible with packrat parsing.)
878
879 Example::
880
881 data = '''
882 def A(z):
883 A1
884 B = 100
885 G = A2
886 A2
887 A3
888 B
889 def BB(a,b,c):
890 BB1
891 def BBA():
892 bba1
893 bba2
894 bba3
895 C
896 D
897 def spam(x,y):
898 def eggs(z):
899 pass
900 '''
901
902
903 indentStack = [1]
904 stmt = Forward()
905
906 identifier = Word(alphas, alphanums)
907 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
908 func_body = indentedBlock(stmt, indentStack)
909 funcDef = Group(funcDecl + func_body)
910
911 rvalue = Forward()
912 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
913 rvalue << (funcCall | identifier | Word(nums))
914 assignment = Group(identifier + "=" + rvalue)
915 stmt << (funcDef | assignment | identifier)
916
917 module_body = stmt[1, ...]
918
919 parseTree = module_body.parseString(data)
920 parseTree.pprint()
921
922 prints::
923
924 [['def',
925 'A',
926 ['(', 'z', ')'],
927 ':',
928 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
929 'B',
930 ['def',
931 'BB',
932 ['(', 'a', 'b', 'c', ')'],
933 ':',
934 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
935 'C',
936 'D',
937 ['def',
938 'spam',
939 ['(', 'x', 'y', ')'],
940 ':',
941 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
942 """
943 backup_stacks.append(indentStack[:])
944
945 def reset_stack():
946 indentStack[:] = backup_stacks[-1]
947
948 def checkPeerIndent(s, l, t):
949 if l >= len(s):
950 return
951 curCol = col(l, s)
952 if curCol != indentStack[-1]:
953 if curCol > indentStack[-1]:
954 raise ParseException(s, l, "illegal nesting")
955 raise ParseException(s, l, "not a peer entry")
956
957 def checkSubIndent(s, l, t):
958 curCol = col(l, s)
959 if curCol > indentStack[-1]:
960 indentStack.append(curCol)
961 else:
962 raise ParseException(s, l, "not a subentry")
963
964 def checkUnindent(s, l, t):
965 if l >= len(s):
966 return
967 curCol = col(l, s)
968 if not (indentStack and curCol in indentStack):
969 raise ParseException(s, l, "not an unindent")
970 if curCol < indentStack[-1]:
971 indentStack.pop()
972
973 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
974 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
975 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
976 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
977 if indent:
978 smExpr = Group(
979 Opt(NL)
980 + INDENT
981 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
982 + UNDENT
983 )
984 else:
985 smExpr = Group(
986 Opt(NL)
987 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
988 + Opt(UNDENT)
989 )
990
991 # add a parse action to remove backup_stack from list of backups
992 smExpr.add_parse_action(
993 lambda: backup_stacks.pop(-1) and None if backup_stacks else None
994 )
995 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
996 blockStatementExpr.ignore(_bslash + LineEnd())
997 return smExpr.set_name("indented block")
998
999
1000 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
1001 c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
1002 "C style comment"
1003 )
1004 "Comment of the form ``/* ... */``"
1005
1006 html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
1007 "Comment of the form ``<!-- ... -->``"
1008
1009 rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
1010 dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
1011 "Comment of the form ``// ... (to end of line)``"
1012
1013 cpp_style_comment = Combine(
1014 Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
1015 ).set_name("C++ style comment")
1016 "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1017
1018 java_style_comment = cpp_style_comment
1019 "Same as :class:`cpp_style_comment`"
1020
1021 python_style_comment = Regex(r"#.*").set_name("Python style comment")
1022 "Comment of the form ``# ... (to end of line)``"
1023
1024
1025 # build list of built-in expressions, for future reference if a global default value
1026 # gets updated
1027 _builtin_exprs: List[ParserElement] = [
1028 v for v in vars().values() if isinstance(v, ParserElement)
1029 ]
1030
1031
1032 # compatibility function, superseded by DelimitedList class
1033 def delimited_list(
1034 expr: Union[str, ParserElement],
1035 delim: Union[str, ParserElement] = ",",
1036 combine: bool = False,
1037 min: typing.Optional[int] = None,
1038 max: typing.Optional[int] = None,
1039 *,
1040 allow_trailing_delim: bool = False,
1041 ) -> ParserElement:
1042 """(DEPRECATED - use :class:`DelimitedList` class)"""
1043 return DelimitedList(
1044 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
1045 )
1046
1047
1048 # pre-PEP8 compatible names
1049 # fmt: off
1050 opAssoc = OpAssoc
1051 anyOpenTag = any_open_tag
1052 anyCloseTag = any_close_tag
1053 commonHTMLEntity = common_html_entity
1054 cStyleComment = c_style_comment
1055 htmlComment = html_comment
1056 restOfLine = rest_of_line
1057 dblSlashComment = dbl_slash_comment
1058 cppStyleComment = cpp_style_comment
1059 javaStyleComment = java_style_comment
1060 pythonStyleComment = python_style_comment
1061
1062 @replaced_by_pep8(DelimitedList)
1063 def delimitedList(): ...
1064
1065 @replaced_by_pep8(DelimitedList)
1066 def delimited_list(): ...
1067
1068 @replaced_by_pep8(counted_array)
1069 def countedArray(): ...
1070
1071 @replaced_by_pep8(match_previous_literal)
1072 def matchPreviousLiteral(): ...
1073
1074 @replaced_by_pep8(match_previous_expr)
1075 def matchPreviousExpr(): ...
1076
1077 @replaced_by_pep8(one_of)
1078 def oneOf(): ...
1079
1080 @replaced_by_pep8(dict_of)
1081 def dictOf(): ...
1082
1083 @replaced_by_pep8(original_text_for)
1084 def originalTextFor(): ...
1085
1086 @replaced_by_pep8(nested_expr)
1087 def nestedExpr(): ...
1088
1089 @replaced_by_pep8(make_html_tags)
1090 def makeHTMLTags(): ...
1091
1092 @replaced_by_pep8(make_xml_tags)
1093 def makeXMLTags(): ...
1094
1095 @replaced_by_pep8(replace_html_entity)
1096 def replaceHTMLEntity(): ...
1097
1098 @replaced_by_pep8(infix_notation)
1099 def infixNotation(): ...
1100 # fmt: on