]>
Commit | Line | Data |
---|---|---|
e0df8241 JR |
1 | # |
2 | # core.py | |
3 | # | |
4 | ||
5 | from collections import deque | |
6 | import os | |
7 | import typing | |
8 | from typing import ( | |
9 | Any, | |
10 | Callable, | |
11 | Generator, | |
12 | List, | |
13 | NamedTuple, | |
14 | Sequence, | |
15 | Set, | |
16 | TextIO, | |
17 | Tuple, | |
18 | Union, | |
19 | cast, | |
20 | ) | |
21 | from abc import ABC, abstractmethod | |
22 | from enum import Enum | |
23 | import string | |
24 | import copy | |
25 | import warnings | |
26 | import re | |
27 | import sys | |
28 | from collections.abc import Iterable | |
29 | import traceback | |
30 | import types | |
31 | from operator import itemgetter | |
32 | from functools import wraps | |
33 | from threading import RLock | |
34 | from pathlib import Path | |
35 | ||
36 | from .util import ( | |
37 | _FifoCache, | |
38 | _UnboundedCache, | |
39 | __config_flags, | |
40 | _collapse_string_to_ranges, | |
41 | _escape_regex_range_chars, | |
42 | _bslash, | |
43 | _flatten, | |
44 | LRUMemo as _LRUMemo, | |
45 | UnboundedMemo as _UnboundedMemo, | |
46 | replaced_by_pep8, | |
47 | ) | |
48 | from .exceptions import * | |
49 | from .actions import * | |
50 | from .results import ParseResults, _ParseResultsWithOffset | |
51 | from .unicode import pyparsing_unicode | |
52 | ||
53 | _MAX_INT = sys.maxsize | |
54 | str_type: Tuple[type, ...] = (str, bytes) | |
55 | ||
56 | # | |
57 | # Copyright (c) 2003-2022 Paul T. McGuire | |
58 | # | |
59 | # Permission is hereby granted, free of charge, to any person obtaining | |
60 | # a copy of this software and associated documentation files (the | |
61 | # "Software"), to deal in the Software without restriction, including | |
62 | # without limitation the rights to use, copy, modify, merge, publish, | |
63 | # distribute, sublicense, and/or sell copies of the Software, and to | |
64 | # permit persons to whom the Software is furnished to do so, subject to | |
65 | # the following conditions: | |
66 | # | |
67 | # The above copyright notice and this permission notice shall be | |
68 | # included in all copies or substantial portions of the Software. | |
69 | # | |
70 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
71 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
72 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
73 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
74 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
75 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
76 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
77 | # | |
78 | ||
79 | ||
80 | if sys.version_info >= (3, 8): | |
81 | from functools import cached_property | |
82 | else: | |
83 | ||
84 | class cached_property: | |
85 | def __init__(self, func): | |
86 | self._func = func | |
87 | ||
88 | def __get__(self, instance, owner=None): | |
89 | ret = instance.__dict__[self._func.__name__] = self._func(instance) | |
90 | return ret | |
91 | ||
92 | ||
93 | class __compat__(__config_flags): | |
94 | """ | |
95 | A cross-version compatibility configuration for pyparsing features that will be | |
96 | released in a future version. By setting values in this configuration to True, | |
97 | those features can be enabled in prior versions for compatibility development | |
98 | and testing. | |
99 | ||
100 | - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping | |
101 | of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; | |
102 | maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 | |
103 | behavior | |
104 | """ | |
105 | ||
106 | _type_desc = "compatibility" | |
107 | ||
108 | collect_all_And_tokens = True | |
109 | ||
110 | _all_names = [__ for __ in locals() if not __.startswith("_")] | |
111 | _fixed_names = """ | |
112 | collect_all_And_tokens | |
113 | """.split() | |
114 | ||
115 | ||
116 | class __diag__(__config_flags): | |
117 | _type_desc = "diagnostic" | |
118 | ||
119 | warn_multiple_tokens_in_named_alternation = False | |
120 | warn_ungrouped_named_tokens_in_collection = False | |
121 | warn_name_set_on_empty_Forward = False | |
122 | warn_on_parse_using_empty_Forward = False | |
123 | warn_on_assignment_to_Forward = False | |
124 | warn_on_multiple_string_args_to_oneof = False | |
125 | warn_on_match_first_with_lshift_operator = False | |
126 | enable_debug_on_named_expressions = False | |
127 | ||
128 | _all_names = [__ for __ in locals() if not __.startswith("_")] | |
129 | _warning_names = [name for name in _all_names if name.startswith("warn")] | |
130 | _debug_names = [name for name in _all_names if name.startswith("enable_debug")] | |
131 | ||
132 | @classmethod | |
133 | def enable_all_warnings(cls) -> None: | |
134 | for name in cls._warning_names: | |
135 | cls.enable(name) | |
136 | ||
137 | ||
138 | class Diagnostics(Enum): | |
139 | """ | |
140 | Diagnostic configuration (all default to disabled) | |
141 | ||
142 | - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results | |
143 | name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions | |
144 | - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results | |
145 | name is defined on a containing expression with ungrouped subexpressions that also | |
146 | have results names | |
147 | - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined | |
148 | with a results name, but has no contents defined | |
149 | - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is | |
150 | defined in a grammar but has never had an expression attached to it | |
151 | - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined | |
152 | but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` | |
153 | - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is | |
154 | incorrectly called with multiple str arguments | |
155 | - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent | |
156 | calls to :class:`ParserElement.set_name` | |
157 | ||
158 | Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. | |
159 | All warnings can be enabled by calling :class:`enable_all_warnings`. | |
160 | """ | |
161 | ||
162 | warn_multiple_tokens_in_named_alternation = 0 | |
163 | warn_ungrouped_named_tokens_in_collection = 1 | |
164 | warn_name_set_on_empty_Forward = 2 | |
165 | warn_on_parse_using_empty_Forward = 3 | |
166 | warn_on_assignment_to_Forward = 4 | |
167 | warn_on_multiple_string_args_to_oneof = 5 | |
168 | warn_on_match_first_with_lshift_operator = 6 | |
169 | enable_debug_on_named_expressions = 7 | |
170 | ||
171 | ||
172 | def enable_diag(diag_enum: Diagnostics) -> None: | |
173 | """ | |
174 | Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). | |
175 | """ | |
176 | __diag__.enable(diag_enum.name) | |
177 | ||
178 | ||
179 | def disable_diag(diag_enum: Diagnostics) -> None: | |
180 | """ | |
181 | Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). | |
182 | """ | |
183 | __diag__.disable(diag_enum.name) | |
184 | ||
185 | ||
186 | def enable_all_warnings() -> None: | |
187 | """ | |
188 | Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). | |
189 | """ | |
190 | __diag__.enable_all_warnings() | |
191 | ||
192 | ||
193 | # hide abstract class | |
194 | del __config_flags | |
195 | ||
196 | ||
197 | def _should_enable_warnings( | |
198 | cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] | |
199 | ) -> bool: | |
200 | enable = bool(warn_env_var) | |
201 | for warn_opt in cmd_line_warn_options: | |
202 | w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( | |
203 | ":" | |
204 | )[:5] | |
205 | if not w_action.lower().startswith("i") and ( | |
206 | not (w_message or w_category or w_module) or w_module == "pyparsing" | |
207 | ): | |
208 | enable = True | |
209 | elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): | |
210 | enable = False | |
211 | return enable | |
212 | ||
213 | ||
214 | if _should_enable_warnings( | |
215 | sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") | |
216 | ): | |
217 | enable_all_warnings() | |
218 | ||
219 | ||
220 | # build list of single arg builtins, that can be used as parse actions | |
221 | _single_arg_builtins = { | |
222 | sum, | |
223 | len, | |
224 | sorted, | |
225 | reversed, | |
226 | list, | |
227 | tuple, | |
228 | set, | |
229 | any, | |
230 | all, | |
231 | min, | |
232 | max, | |
233 | } | |
234 | ||
235 | _generatorType = types.GeneratorType | |
236 | ParseImplReturnType = Tuple[int, Any] | |
237 | PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] | |
238 | ParseAction = Union[ | |
239 | Callable[[], Any], | |
240 | Callable[[ParseResults], Any], | |
241 | Callable[[int, ParseResults], Any], | |
242 | Callable[[str, int, ParseResults], Any], | |
243 | ] | |
244 | ParseCondition = Union[ | |
245 | Callable[[], bool], | |
246 | Callable[[ParseResults], bool], | |
247 | Callable[[int, ParseResults], bool], | |
248 | Callable[[str, int, ParseResults], bool], | |
249 | ] | |
250 | ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] | |
251 | DebugStartAction = Callable[[str, int, "ParserElement", bool], None] | |
252 | DebugSuccessAction = Callable[ | |
253 | [str, int, int, "ParserElement", ParseResults, bool], None | |
254 | ] | |
255 | DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] | |
256 | ||
257 | ||
258 | alphas = string.ascii_uppercase + string.ascii_lowercase | |
259 | identchars = pyparsing_unicode.Latin1.identchars | |
260 | identbodychars = pyparsing_unicode.Latin1.identbodychars | |
261 | nums = "0123456789" | |
262 | hexnums = nums + "ABCDEFabcdef" | |
263 | alphanums = alphas + nums | |
264 | printables = "".join([c for c in string.printable if c not in string.whitespace]) | |
265 | ||
266 | _trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] | |
267 | ||
268 | ||
269 | def _trim_arity(func, max_limit=3): | |
270 | """decorator to trim function calls to match the arity of the target""" | |
271 | global _trim_arity_call_line | |
272 | ||
273 | if func in _single_arg_builtins: | |
274 | return lambda s, l, t: func(t) | |
275 | ||
276 | limit = 0 | |
277 | found_arity = False | |
278 | ||
279 | # synthesize what would be returned by traceback.extract_stack at the call to | |
280 | # user's parse action 'func', so that we don't incur call penalty at parse time | |
281 | ||
282 | # fmt: off | |
283 | LINE_DIFF = 7 | |
284 | # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND | |
285 | # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! | |
286 | _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) | |
287 | pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) | |
288 | ||
289 | def wrapper(*args): | |
290 | nonlocal found_arity, limit | |
291 | while 1: | |
292 | try: | |
293 | ret = func(*args[limit:]) | |
294 | found_arity = True | |
295 | return ret | |
296 | except TypeError as te: | |
297 | # re-raise TypeErrors if they did not come from our arity testing | |
298 | if found_arity: | |
299 | raise | |
300 | else: | |
301 | tb = te.__traceback__ | |
302 | frames = traceback.extract_tb(tb, limit=2) | |
303 | frame_summary = frames[-1] | |
304 | trim_arity_type_error = ( | |
305 | [frame_summary[:2]][-1][:2] == pa_call_line_synth | |
306 | ) | |
307 | del tb | |
308 | ||
309 | if trim_arity_type_error: | |
310 | if limit < max_limit: | |
311 | limit += 1 | |
312 | continue | |
313 | ||
314 | raise | |
315 | # fmt: on | |
316 | ||
317 | # copy func name to wrapper for sensible debug output | |
318 | # (can't use functools.wraps, since that messes with function signature) | |
319 | func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) | |
320 | wrapper.__name__ = func_name | |
321 | wrapper.__doc__ = func.__doc__ | |
322 | ||
323 | return wrapper | |
324 | ||
325 | ||
326 | def condition_as_parse_action( | |
327 | fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False | |
328 | ) -> ParseAction: | |
329 | """ | |
330 | Function to convert a simple predicate function that returns ``True`` or ``False`` | |
331 | into a parse action. Can be used in places when a parse action is required | |
332 | and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition | |
333 | to an operator level in :class:`infix_notation`). | |
334 | ||
335 | Optional keyword arguments: | |
336 | ||
337 | - ``message`` - define a custom message to be used in the raised exception | |
338 | - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; | |
339 | otherwise will raise :class:`ParseException` | |
340 | ||
341 | """ | |
342 | msg = message if message is not None else "failed user-defined condition" | |
343 | exc_type = ParseFatalException if fatal else ParseException | |
344 | fn = _trim_arity(fn) | |
345 | ||
346 | @wraps(fn) | |
347 | def pa(s, l, t): | |
348 | if not bool(fn(s, l, t)): | |
349 | raise exc_type(s, l, msg) | |
350 | ||
351 | return pa | |
352 | ||
353 | ||
354 | def _default_start_debug_action( | |
355 | instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False | |
356 | ): | |
357 | cache_hit_str = "*" if cache_hit else "" | |
358 | print( | |
359 | ( | |
360 | f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" | |
361 | f" {line(loc, instring)}\n" | |
362 | f" {' ' * (col(loc, instring) - 1)}^" | |
363 | ) | |
364 | ) | |
365 | ||
366 | ||
367 | def _default_success_debug_action( | |
368 | instring: str, | |
369 | startloc: int, | |
370 | endloc: int, | |
371 | expr: "ParserElement", | |
372 | toks: ParseResults, | |
373 | cache_hit: bool = False, | |
374 | ): | |
375 | cache_hit_str = "*" if cache_hit else "" | |
376 | print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") | |
377 | ||
378 | ||
379 | def _default_exception_debug_action( | |
380 | instring: str, | |
381 | loc: int, | |
382 | expr: "ParserElement", | |
383 | exc: Exception, | |
384 | cache_hit: bool = False, | |
385 | ): | |
386 | cache_hit_str = "*" if cache_hit else "" | |
387 | print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") | |
388 | ||
389 | ||
390 | def null_debug_action(*args): | |
391 | """'Do-nothing' debug action, to suppress debugging output during parsing.""" | |
392 | ||
393 | ||
394 | class ParserElement(ABC): | |
395 | """Abstract base level parser element class.""" | |
396 | ||
397 | DEFAULT_WHITE_CHARS: str = " \n\t\r" | |
398 | verbose_stacktrace: bool = False | |
399 | _literalStringClass: type = None # type: ignore[assignment] | |
400 | ||
401 | @staticmethod | |
402 | def set_default_whitespace_chars(chars: str) -> None: | |
403 | r""" | |
404 | Overrides the default whitespace chars | |
405 | ||
406 | Example:: | |
407 | ||
408 | # default whitespace chars are space, <TAB> and newline | |
409 | Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] | |
410 | ||
411 | # change to just treat newline as significant | |
412 | ParserElement.set_default_whitespace_chars(" \t") | |
413 | Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] | |
414 | """ | |
415 | ParserElement.DEFAULT_WHITE_CHARS = chars | |
416 | ||
417 | # update whitespace all parse expressions defined in this module | |
418 | for expr in _builtin_exprs: | |
419 | if expr.copyDefaultWhiteChars: | |
420 | expr.whiteChars = set(chars) | |
421 | ||
422 | @staticmethod | |
423 | def inline_literals_using(cls: type) -> None: | |
424 | """ | |
425 | Set class to be used for inclusion of string literals into a parser. | |
426 | ||
427 | Example:: | |
428 | ||
429 | # default literal class used is Literal | |
430 | integer = Word(nums) | |
431 | date_str = integer("year") + '/' + integer("month") + '/' + integer("day") | |
432 | ||
433 | date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] | |
434 | ||
435 | ||
436 | # change to Suppress | |
437 | ParserElement.inline_literals_using(Suppress) | |
438 | date_str = integer("year") + '/' + integer("month") + '/' + integer("day") | |
439 | ||
440 | date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] | |
441 | """ | |
442 | ParserElement._literalStringClass = cls | |
443 | ||
444 | @classmethod | |
445 | def using_each(cls, seq, **class_kwargs): | |
446 | """ | |
447 | Yields a sequence of class(obj, **class_kwargs) for obj in seq. | |
448 | ||
449 | Example:: | |
450 | ||
451 | LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") | |
452 | ||
453 | """ | |
454 | yield from (cls(obj, **class_kwargs) for obj in seq) | |
455 | ||
456 | class DebugActions(NamedTuple): | |
457 | debug_try: typing.Optional[DebugStartAction] | |
458 | debug_match: typing.Optional[DebugSuccessAction] | |
459 | debug_fail: typing.Optional[DebugExceptionAction] | |
460 | ||
461 | def __init__(self, savelist: bool = False): | |
462 | self.parseAction: List[ParseAction] = list() | |
463 | self.failAction: typing.Optional[ParseFailAction] = None | |
464 | self.customName: str = None # type: ignore[assignment] | |
465 | self._defaultName: typing.Optional[str] = None | |
466 | self.resultsName: str = None # type: ignore[assignment] | |
467 | self.saveAsList = savelist | |
468 | self.skipWhitespace = True | |
469 | self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) | |
470 | self.copyDefaultWhiteChars = True | |
471 | # used when checking for left-recursion | |
472 | self.mayReturnEmpty = False | |
473 | self.keepTabs = False | |
474 | self.ignoreExprs: List["ParserElement"] = list() | |
475 | self.debug = False | |
476 | self.streamlined = False | |
477 | # optimize exception handling for subclasses that don't advance parse index | |
478 | self.mayIndexError = True | |
479 | self.errmsg = "" | |
480 | # mark results names as modal (report only last) or cumulative (list all) | |
481 | self.modalResults = True | |
482 | # custom debug actions | |
483 | self.debugActions = self.DebugActions(None, None, None) | |
484 | # avoid redundant calls to preParse | |
485 | self.callPreparse = True | |
486 | self.callDuringTry = False | |
487 | self.suppress_warnings_: List[Diagnostics] = [] | |
488 | ||
489 | def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": | |
490 | """ | |
491 | Suppress warnings emitted for a particular diagnostic on this expression. | |
492 | ||
493 | Example:: | |
494 | ||
495 | base = pp.Forward() | |
496 | base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) | |
497 | ||
498 | # statement would normally raise a warning, but is now suppressed | |
499 | print(base.parse_string("x")) | |
500 | ||
501 | """ | |
502 | self.suppress_warnings_.append(warning_type) | |
503 | return self | |
504 | ||
505 | def visit_all(self): | |
506 | """General-purpose method to yield all expressions and sub-expressions | |
507 | in a grammar. Typically just for internal use. | |
508 | """ | |
509 | to_visit = deque([self]) | |
510 | seen = set() | |
511 | while to_visit: | |
512 | cur = to_visit.popleft() | |
513 | ||
514 | # guard against looping forever through recursive grammars | |
515 | if cur in seen: | |
516 | continue | |
517 | seen.add(cur) | |
518 | ||
519 | to_visit.extend(cur.recurse()) | |
520 | yield cur | |
521 | ||
522 | def copy(self) -> "ParserElement": | |
523 | """ | |
524 | Make a copy of this :class:`ParserElement`. Useful for defining | |
525 | different parse actions for the same parsing pattern, using copies of | |
526 | the original parse element. | |
527 | ||
528 | Example:: | |
529 | ||
530 | integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) | |
531 | integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") | |
532 | integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") | |
533 | ||
534 | print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) | |
535 | ||
536 | prints:: | |
537 | ||
538 | [5120, 100, 655360, 268435456] | |
539 | ||
540 | Equivalent form of ``expr.copy()`` is just ``expr()``:: | |
541 | ||
542 | integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") | |
543 | """ | |
544 | cpy = copy.copy(self) | |
545 | cpy.parseAction = self.parseAction[:] | |
546 | cpy.ignoreExprs = self.ignoreExprs[:] | |
547 | if self.copyDefaultWhiteChars: | |
548 | cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) | |
549 | return cpy | |
550 | ||
551 | def set_results_name( | |
552 | self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False | |
553 | ) -> "ParserElement": | |
554 | """ | |
555 | Define name for referencing matching tokens as a nested attribute | |
556 | of the returned parse results. | |
557 | ||
558 | Normally, results names are assigned as you would assign keys in a dict: | |
559 | any existing value is overwritten by later values. If it is necessary to | |
560 | keep all values captured for a particular results name, call ``set_results_name`` | |
561 | with ``list_all_matches`` = True. | |
562 | ||
563 | NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; | |
564 | this is so that the client can define a basic element, such as an | |
565 | integer, and reference it in multiple places with different names. | |
566 | ||
567 | You can also set results names using the abbreviated syntax, | |
568 | ``expr("name")`` in place of ``expr.set_results_name("name")`` | |
569 | - see :class:`__call__`. If ``list_all_matches`` is required, use | |
570 | ``expr("name*")``. | |
571 | ||
572 | Example:: | |
573 | ||
574 | date_str = (integer.set_results_name("year") + '/' | |
575 | + integer.set_results_name("month") + '/' | |
576 | + integer.set_results_name("day")) | |
577 | ||
578 | # equivalent form: | |
579 | date_str = integer("year") + '/' + integer("month") + '/' + integer("day") | |
580 | """ | |
581 | listAllMatches = listAllMatches or list_all_matches | |
582 | return self._setResultsName(name, listAllMatches) | |
583 | ||
584 | def _setResultsName(self, name, listAllMatches=False): | |
585 | if name is None: | |
586 | return self | |
587 | newself = self.copy() | |
588 | if name.endswith("*"): | |
589 | name = name[:-1] | |
590 | listAllMatches = True | |
591 | newself.resultsName = name | |
592 | newself.modalResults = not listAllMatches | |
593 | return newself | |
594 | ||
595 | def set_break(self, break_flag: bool = True) -> "ParserElement": | |
596 | """ | |
597 | Method to invoke the Python pdb debugger when this element is | |
598 | about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to | |
599 | disable. | |
600 | """ | |
601 | if break_flag: | |
602 | _parseMethod = self._parse | |
603 | ||
604 | def breaker(instring, loc, doActions=True, callPreParse=True): | |
605 | import pdb | |
606 | ||
607 | # this call to pdb.set_trace() is intentional, not a checkin error | |
608 | pdb.set_trace() | |
609 | return _parseMethod(instring, loc, doActions, callPreParse) | |
610 | ||
611 | breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] | |
612 | self._parse = breaker # type: ignore [assignment] | |
613 | else: | |
614 | if hasattr(self._parse, "_originalParseMethod"): | |
615 | self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment] | |
616 | return self | |
617 | ||
618 | def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": | |
619 | """ | |
620 | Define one or more actions to perform when successfully matching parse element definition. | |
621 | ||
622 | Parse actions can be called to perform data conversions, do extra validation, | |
623 | update external data structures, or enhance or replace the parsed tokens. | |
624 | Each parse action ``fn`` is a callable method with 0-3 arguments, called as | |
625 | ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: | |
626 | ||
627 | - ``s`` = the original string being parsed (see note below) | |
628 | - ``loc`` = the location of the matching substring | |
629 | - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object | |
630 | ||
631 | The parsed tokens are passed to the parse action as ParseResults. They can be | |
632 | modified in place using list-style append, extend, and pop operations to update | |
633 | the parsed list elements; and with dictionary-style item set and del operations | |
634 | to add, update, or remove any named results. If the tokens are modified in place, | |
635 | it is not necessary to return them with a return statement. | |
636 | ||
637 | Parse actions can also completely replace the given tokens, with another ``ParseResults`` | |
638 | object, or with some entirely different object (common for parse actions that perform data | |
639 | conversions). A convenient way to build a new parse result is to define the values | |
640 | using a dict, and then create the return value using :class:`ParseResults.from_dict`. | |
641 | ||
642 | If None is passed as the ``fn`` parse action, all previously added parse actions for this | |
643 | expression are cleared. | |
644 | ||
645 | Optional keyword arguments: | |
646 | ||
647 | - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during | |
648 | lookaheads and alternate testing. For parse actions that have side effects, it is | |
649 | important to only call the parse action once it is determined that it is being | |
650 | called as part of a successful parse. For parse actions that perform additional | |
651 | validation, then call_during_try should be passed as True, so that the validation | |
652 | code is included in the preliminary "try" parses. | |
653 | ||
654 | Note: the default parsing behavior is to expand tabs in the input string | |
655 | before starting the parsing process. See :class:`parse_string` for more | |
656 | information on parsing strings containing ``<TAB>`` s, and suggested | |
657 | methods to maintain a consistent view of the parsed string, the parse | |
658 | location, and line and column positions within the parsed string. | |
659 | ||
660 | Example:: | |
661 | ||
662 | # parse dates in the form YYYY/MM/DD | |
663 | ||
664 | # use parse action to convert toks from str to int at parse time | |
665 | def convert_to_int(toks): | |
666 | return int(toks[0]) | |
667 | ||
668 | # use a parse action to verify that the date is a valid date | |
669 | def is_valid_date(instring, loc, toks): | |
670 | from datetime import date | |
671 | year, month, day = toks[::2] | |
672 | try: | |
673 | date(year, month, day) | |
674 | except ValueError: | |
675 | raise ParseException(instring, loc, "invalid date given") | |
676 | ||
677 | integer = Word(nums) | |
678 | date_str = integer + '/' + integer + '/' + integer | |
679 | ||
680 | # add parse actions | |
681 | integer.set_parse_action(convert_to_int) | |
682 | date_str.set_parse_action(is_valid_date) | |
683 | ||
684 | # note that integer fields are now ints, not strings | |
685 | date_str.run_tests(''' | |
686 | # successful parse - note that integer fields were converted to ints | |
687 | 1999/12/31 | |
688 | ||
689 | # fail - invalid date | |
690 | 1999/13/31 | |
691 | ''') | |
692 | """ | |
693 | if list(fns) == [None]: | |
694 | self.parseAction = [] | |
695 | else: | |
696 | if not all(callable(fn) for fn in fns): | |
697 | raise TypeError("parse actions must be callable") | |
698 | self.parseAction = [_trim_arity(fn) for fn in fns] | |
699 | self.callDuringTry = kwargs.get( | |
700 | "call_during_try", kwargs.get("callDuringTry", False) | |
701 | ) | |
702 | return self | |
703 | ||
704 | def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": | |
705 | """ | |
706 | Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. | |
707 | ||
708 | See examples in :class:`copy`. | |
709 | """ | |
710 | self.parseAction += [_trim_arity(fn) for fn in fns] | |
711 | self.callDuringTry = self.callDuringTry or kwargs.get( | |
712 | "call_during_try", kwargs.get("callDuringTry", False) | |
713 | ) | |
714 | return self | |
715 | ||
716 | def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": | |
717 | """Add a boolean predicate function to expression's list of parse actions. See | |
718 | :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, | |
719 | functions passed to ``add_condition`` need to return boolean success/fail of the condition. | |
720 | ||
721 | Optional keyword arguments: | |
722 | ||
723 | - ``message`` = define a custom message to be used in the raised exception | |
724 | - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise | |
725 | ParseException | |
726 | - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, | |
727 | default=False | |
728 | ||
729 | Example:: | |
730 | ||
731 | integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) | |
732 | year_int = integer.copy() | |
733 | year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") | |
734 | date_str = year_int + '/' + integer + '/' + integer | |
735 | ||
736 | result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), | |
737 | (line:1, col:1) | |
738 | """ | |
739 | for fn in fns: | |
740 | self.parseAction.append( | |
741 | condition_as_parse_action( | |
742 | fn, | |
743 | message=str(kwargs.get("message")), | |
744 | fatal=bool(kwargs.get("fatal", False)), | |
745 | ) | |
746 | ) | |
747 | ||
748 | self.callDuringTry = self.callDuringTry or kwargs.get( | |
749 | "call_during_try", kwargs.get("callDuringTry", False) | |
750 | ) | |
751 | return self | |
752 | ||
753 | def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": | |
754 | """ | |
755 | Define action to perform if parsing fails at this expression. | |
756 | Fail acton fn is a callable function that takes the arguments | |
757 | ``fn(s, loc, expr, err)`` where: | |
758 | ||
759 | - ``s`` = string being parsed | |
760 | - ``loc`` = location where expression match was attempted and failed | |
761 | - ``expr`` = the parse expression that failed | |
762 | - ``err`` = the exception thrown | |
763 | ||
764 | The function returns no value. It may throw :class:`ParseFatalException` | |
765 | if it is desired to stop parsing immediately.""" | |
766 | self.failAction = fn | |
767 | return self | |
768 | ||
769 | def _skipIgnorables(self, instring: str, loc: int) -> int: | |
770 | if not self.ignoreExprs: | |
771 | return loc | |
772 | exprsFound = True | |
773 | ignore_expr_fns = [e._parse for e in self.ignoreExprs] | |
774 | while exprsFound: | |
775 | exprsFound = False | |
776 | for ignore_fn in ignore_expr_fns: | |
777 | try: | |
778 | while 1: | |
779 | loc, dummy = ignore_fn(instring, loc) | |
780 | exprsFound = True | |
781 | except ParseException: | |
782 | pass | |
783 | return loc | |
784 | ||
785 | def preParse(self, instring: str, loc: int) -> int: | |
786 | if self.ignoreExprs: | |
787 | loc = self._skipIgnorables(instring, loc) | |
788 | ||
789 | if self.skipWhitespace: | |
790 | instrlen = len(instring) | |
791 | white_chars = self.whiteChars | |
792 | while loc < instrlen and instring[loc] in white_chars: | |
793 | loc += 1 | |
794 | ||
795 | return loc | |
796 | ||
797 | def parseImpl(self, instring, loc, doActions=True): | |
798 | return loc, [] | |
799 | ||
800 | def postParse(self, instring, loc, tokenlist): | |
801 | return tokenlist | |
802 | ||
803 | # @profile | |
804 | def _parseNoCache( | |
805 | self, instring, loc, doActions=True, callPreParse=True | |
806 | ) -> Tuple[int, ParseResults]: | |
807 | TRY, MATCH, FAIL = 0, 1, 2 | |
808 | debugging = self.debug # and doActions) | |
809 | len_instring = len(instring) | |
810 | ||
811 | if debugging or self.failAction: | |
812 | # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) | |
813 | try: | |
814 | if callPreParse and self.callPreparse: | |
815 | pre_loc = self.preParse(instring, loc) | |
816 | else: | |
817 | pre_loc = loc | |
818 | tokens_start = pre_loc | |
819 | if self.debugActions.debug_try: | |
820 | self.debugActions.debug_try(instring, tokens_start, self, False) | |
821 | if self.mayIndexError or pre_loc >= len_instring: | |
822 | try: | |
823 | loc, tokens = self.parseImpl(instring, pre_loc, doActions) | |
824 | except IndexError: | |
825 | raise ParseException(instring, len_instring, self.errmsg, self) | |
826 | else: | |
827 | loc, tokens = self.parseImpl(instring, pre_loc, doActions) | |
828 | except Exception as err: | |
829 | # print("Exception raised:", err) | |
830 | if self.debugActions.debug_fail: | |
831 | self.debugActions.debug_fail( | |
832 | instring, tokens_start, self, err, False | |
833 | ) | |
834 | if self.failAction: | |
835 | self.failAction(instring, tokens_start, self, err) | |
836 | raise | |
837 | else: | |
838 | if callPreParse and self.callPreparse: | |
839 | pre_loc = self.preParse(instring, loc) | |
840 | else: | |
841 | pre_loc = loc | |
842 | tokens_start = pre_loc | |
843 | if self.mayIndexError or pre_loc >= len_instring: | |
844 | try: | |
845 | loc, tokens = self.parseImpl(instring, pre_loc, doActions) | |
846 | except IndexError: | |
847 | raise ParseException(instring, len_instring, self.errmsg, self) | |
848 | else: | |
849 | loc, tokens = self.parseImpl(instring, pre_loc, doActions) | |
850 | ||
851 | tokens = self.postParse(instring, loc, tokens) | |
852 | ||
853 | ret_tokens = ParseResults( | |
854 | tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults | |
855 | ) | |
856 | if self.parseAction and (doActions or self.callDuringTry): | |
857 | if debugging: | |
858 | try: | |
859 | for fn in self.parseAction: | |
860 | try: | |
861 | tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] | |
862 | except IndexError as parse_action_exc: | |
863 | exc = ParseException("exception raised in parse action") | |
864 | raise exc from parse_action_exc | |
865 | ||
866 | if tokens is not None and tokens is not ret_tokens: | |
867 | ret_tokens = ParseResults( | |
868 | tokens, | |
869 | self.resultsName, | |
870 | asList=self.saveAsList | |
871 | and isinstance(tokens, (ParseResults, list)), | |
872 | modal=self.modalResults, | |
873 | ) | |
874 | except Exception as err: | |
875 | # print "Exception raised in user parse action:", err | |
876 | if self.debugActions.debug_fail: | |
877 | self.debugActions.debug_fail( | |
878 | instring, tokens_start, self, err, False | |
879 | ) | |
880 | raise | |
881 | else: | |
882 | for fn in self.parseAction: | |
883 | try: | |
884 | tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] | |
885 | except IndexError as parse_action_exc: | |
886 | exc = ParseException("exception raised in parse action") | |
887 | raise exc from parse_action_exc | |
888 | ||
889 | if tokens is not None and tokens is not ret_tokens: | |
890 | ret_tokens = ParseResults( | |
891 | tokens, | |
892 | self.resultsName, | |
893 | asList=self.saveAsList | |
894 | and isinstance(tokens, (ParseResults, list)), | |
895 | modal=self.modalResults, | |
896 | ) | |
897 | if debugging: | |
898 | # print("Matched", self, "->", ret_tokens.as_list()) | |
899 | if self.debugActions.debug_match: | |
900 | self.debugActions.debug_match( | |
901 | instring, tokens_start, loc, self, ret_tokens, False | |
902 | ) | |
903 | ||
904 | return loc, ret_tokens | |
905 | ||
906 | def try_parse( | |
907 | self, | |
908 | instring: str, | |
909 | loc: int, | |
910 | *, | |
911 | raise_fatal: bool = False, | |
912 | do_actions: bool = False, | |
913 | ) -> int: | |
914 | try: | |
915 | return self._parse(instring, loc, doActions=do_actions)[0] | |
916 | except ParseFatalException: | |
917 | if raise_fatal: | |
918 | raise | |
919 | raise ParseException(instring, loc, self.errmsg, self) | |
920 | ||
921 | def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: | |
922 | try: | |
923 | self.try_parse(instring, loc, do_actions=do_actions) | |
924 | except (ParseException, IndexError): | |
925 | return False | |
926 | else: | |
927 | return True | |
928 | ||
929 | # cache for left-recursion in Forward references | |
930 | recursion_lock = RLock() | |
931 | recursion_memos: typing.Dict[ | |
932 | Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] | |
933 | ] = {} | |
934 | ||
935 | class _CacheType(dict): | |
936 | """ | |
937 | class to help type checking | |
938 | """ | |
939 | ||
940 | not_in_cache: bool | |
941 | ||
942 | def get(self, *args): | |
943 | ... | |
944 | ||
945 | def set(self, *args): | |
946 | ... | |
947 | ||
948 | # argument cache for optimizing repeated calls when backtracking through recursive expressions | |
949 | packrat_cache = ( | |
950 | _CacheType() | |
951 | ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail | |
952 | packrat_cache_lock = RLock() | |
953 | packrat_cache_stats = [0, 0] | |
954 | ||
955 | # this method gets repeatedly called during backtracking with the same arguments - | |
956 | # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression | |
957 | def _parseCache( | |
958 | self, instring, loc, doActions=True, callPreParse=True | |
959 | ) -> Tuple[int, ParseResults]: | |
960 | HIT, MISS = 0, 1 | |
961 | TRY, MATCH, FAIL = 0, 1, 2 | |
962 | lookup = (self, instring, loc, callPreParse, doActions) | |
963 | with ParserElement.packrat_cache_lock: | |
964 | cache = ParserElement.packrat_cache | |
965 | value = cache.get(lookup) | |
966 | if value is cache.not_in_cache: | |
967 | ParserElement.packrat_cache_stats[MISS] += 1 | |
968 | try: | |
969 | value = self._parseNoCache(instring, loc, doActions, callPreParse) | |
970 | except ParseBaseException as pe: | |
971 | # cache a copy of the exception, without the traceback | |
972 | cache.set(lookup, pe.__class__(*pe.args)) | |
973 | raise | |
974 | else: | |
975 | cache.set(lookup, (value[0], value[1].copy(), loc)) | |
976 | return value | |
977 | else: | |
978 | ParserElement.packrat_cache_stats[HIT] += 1 | |
979 | if self.debug and self.debugActions.debug_try: | |
980 | try: | |
981 | self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] | |
982 | except TypeError: | |
983 | pass | |
984 | if isinstance(value, Exception): | |
985 | if self.debug and self.debugActions.debug_fail: | |
986 | try: | |
987 | self.debugActions.debug_fail( | |
988 | instring, loc, self, value, cache_hit=True # type: ignore [call-arg] | |
989 | ) | |
990 | except TypeError: | |
991 | pass | |
992 | raise value | |
993 | ||
994 | value = cast(Tuple[int, ParseResults, int], value) | |
995 | loc_, result, endloc = value[0], value[1].copy(), value[2] | |
996 | if self.debug and self.debugActions.debug_match: | |
997 | try: | |
998 | self.debugActions.debug_match( | |
999 | instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] | |
1000 | ) | |
1001 | except TypeError: | |
1002 | pass | |
1003 | ||
1004 | return loc_, result | |
1005 | ||
1006 | _parse = _parseNoCache | |
1007 | ||
1008 | @staticmethod | |
1009 | def reset_cache() -> None: | |
1010 | ParserElement.packrat_cache.clear() | |
1011 | ParserElement.packrat_cache_stats[:] = [0] * len( | |
1012 | ParserElement.packrat_cache_stats | |
1013 | ) | |
1014 | ParserElement.recursion_memos.clear() | |
1015 | ||
1016 | _packratEnabled = False | |
1017 | _left_recursion_enabled = False | |
1018 | ||
1019 | @staticmethod | |
1020 | def disable_memoization() -> None: | |
1021 | """ | |
1022 | Disables active Packrat or Left Recursion parsing and their memoization | |
1023 | ||
1024 | This method also works if neither Packrat nor Left Recursion are enabled. | |
1025 | This makes it safe to call before activating Packrat nor Left Recursion | |
1026 | to clear any previous settings. | |
1027 | """ | |
1028 | ParserElement.reset_cache() | |
1029 | ParserElement._left_recursion_enabled = False | |
1030 | ParserElement._packratEnabled = False | |
1031 | ParserElement._parse = ParserElement._parseNoCache | |
1032 | ||
1033 | @staticmethod | |
1034 | def enable_left_recursion( | |
1035 | cache_size_limit: typing.Optional[int] = None, *, force=False | |
1036 | ) -> None: | |
1037 | """ | |
1038 | Enables "bounded recursion" parsing, which allows for both direct and indirect | |
1039 | left-recursion. During parsing, left-recursive :class:`Forward` elements are | |
1040 | repeatedly matched with a fixed recursion depth that is gradually increased | |
1041 | until finding the longest match. | |
1042 | ||
1043 | Example:: | |
1044 | ||
1045 | from pip._vendor import pyparsing as pp | |
1046 | pp.ParserElement.enable_left_recursion() | |
1047 | ||
1048 | E = pp.Forward("E") | |
1049 | num = pp.Word(pp.nums) | |
1050 | # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... | |
1051 | E <<= E + '+' - num | num | |
1052 | ||
1053 | print(E.parse_string("1+2+3")) | |
1054 | ||
1055 | Recursion search naturally memoizes matches of ``Forward`` elements and may | |
1056 | thus skip reevaluation of parse actions during backtracking. This may break | |
1057 | programs with parse actions which rely on strict ordering of side-effects. | |
1058 | ||
1059 | Parameters: | |
1060 | ||
1061 | - ``cache_size_limit`` - (default=``None``) - memoize at most this many | |
1062 | ``Forward`` elements during matching; if ``None`` (the default), | |
1063 | memoize all ``Forward`` elements. | |
1064 | ||
1065 | Bounded Recursion parsing works similar but not identical to Packrat parsing, | |
1066 | thus the two cannot be used together. Use ``force=True`` to disable any | |
1067 | previous, conflicting settings. | |
1068 | """ | |
1069 | if force: | |
1070 | ParserElement.disable_memoization() | |
1071 | elif ParserElement._packratEnabled: | |
1072 | raise RuntimeError("Packrat and Bounded Recursion are not compatible") | |
1073 | if cache_size_limit is None: | |
1074 | ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment] | |
1075 | elif cache_size_limit > 0: | |
1076 | ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] | |
1077 | else: | |
1078 | raise NotImplementedError("Memo size of %s" % cache_size_limit) | |
1079 | ParserElement._left_recursion_enabled = True | |
1080 | ||
1081 | @staticmethod | |
1082 | def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None: | |
1083 | """ | |
1084 | Enables "packrat" parsing, which adds memoizing to the parsing logic. | |
1085 | Repeated parse attempts at the same string location (which happens | |
1086 | often in many complex grammars) can immediately return a cached value, | |
1087 | instead of re-executing parsing/validating code. Memoizing is done of | |
1088 | both valid results and parsing exceptions. | |
1089 | ||
1090 | Parameters: | |
1091 | ||
1092 | - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided | |
1093 | will limit the size of the packrat cache; if None is passed, then | |
1094 | the cache size will be unbounded; if 0 is passed, the cache will | |
1095 | be effectively disabled. | |
1096 | ||
1097 | This speedup may break existing programs that use parse actions that | |
1098 | have side-effects. For this reason, packrat parsing is disabled when | |
1099 | you first import pyparsing. To activate the packrat feature, your | |
1100 | program must call the class method :class:`ParserElement.enable_packrat`. | |
1101 | For best results, call ``enable_packrat()`` immediately after | |
1102 | importing pyparsing. | |
1103 | ||
1104 | Example:: | |
1105 | ||
1106 | from pip._vendor import pyparsing | |
1107 | pyparsing.ParserElement.enable_packrat() | |
1108 | ||
1109 | Packrat parsing works similar but not identical to Bounded Recursion parsing, | |
1110 | thus the two cannot be used together. Use ``force=True`` to disable any | |
1111 | previous, conflicting settings. | |
1112 | """ | |
1113 | if force: | |
1114 | ParserElement.disable_memoization() | |
1115 | elif ParserElement._left_recursion_enabled: | |
1116 | raise RuntimeError("Packrat and Bounded Recursion are not compatible") | |
1117 | if not ParserElement._packratEnabled: | |
1118 | ParserElement._packratEnabled = True | |
1119 | if cache_size_limit is None: | |
1120 | ParserElement.packrat_cache = _UnboundedCache() | |
1121 | else: | |
1122 | ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment] | |
1123 | ParserElement._parse = ParserElement._parseCache | |
1124 | ||
1125 | def parse_string( | |
1126 | self, instring: str, parse_all: bool = False, *, parseAll: bool = False | |
1127 | ) -> ParseResults: | |
1128 | """ | |
1129 | Parse a string with respect to the parser definition. This function is intended as the primary interface to the | |
1130 | client code. | |
1131 | ||
1132 | :param instring: The input string to be parsed. | |
1133 | :param parse_all: If set, the entire input string must match the grammar. | |
1134 | :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. | |
1135 | :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. | |
1136 | :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or | |
1137 | an object with attributes if the given parser includes results names. | |
1138 | ||
1139 | If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This | |
1140 | is also equivalent to ending the grammar with :class:`StringEnd`\\ (). | |
1141 | ||
1142 | To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are | |
1143 | converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string | |
1144 | contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string | |
1145 | being parsed, one can ensure a consistent view of the input string by doing one of the following: | |
1146 | ||
1147 | - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), | |
1148 | - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the | |
1149 | parse action's ``s`` argument, or | |
1150 | - explicitly expand the tabs in your input string before calling ``parse_string``. | |
1151 | ||
1152 | Examples: | |
1153 | ||
1154 | By default, partial matches are OK. | |
1155 | ||
1156 | >>> res = Word('a').parse_string('aaaaabaaa') | |
1157 | >>> print(res) | |
1158 | ['aaaaa'] | |
1159 | ||
1160 | The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children | |
1161 | directly to see more examples. | |
1162 | ||
1163 | It raises an exception if parse_all flag is set and instring does not match the whole grammar. | |
1164 | ||
1165 | >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) | |
1166 | Traceback (most recent call last): | |
1167 | ... | |
1168 | pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) | |
1169 | """ | |
1170 | parseAll = parse_all or parseAll | |
1171 | ||
1172 | ParserElement.reset_cache() | |
1173 | if not self.streamlined: | |
1174 | self.streamline() | |
1175 | for e in self.ignoreExprs: | |
1176 | e.streamline() | |
1177 | if not self.keepTabs: | |
1178 | instring = instring.expandtabs() | |
1179 | try: | |
1180 | loc, tokens = self._parse(instring, 0) | |
1181 | if parseAll: | |
1182 | loc = self.preParse(instring, loc) | |
1183 | se = Empty() + StringEnd() | |
1184 | se._parse(instring, loc) | |
1185 | except ParseBaseException as exc: | |
1186 | if ParserElement.verbose_stacktrace: | |
1187 | raise | |
1188 | else: | |
1189 | # catch and re-raise exception from here, clearing out pyparsing internal stack trace | |
1190 | raise exc.with_traceback(None) | |
1191 | else: | |
1192 | return tokens | |
1193 | ||
1194 | def scan_string( | |
1195 | self, | |
1196 | instring: str, | |
1197 | max_matches: int = _MAX_INT, | |
1198 | overlap: bool = False, | |
1199 | *, | |
1200 | debug: bool = False, | |
1201 | maxMatches: int = _MAX_INT, | |
1202 | ) -> Generator[Tuple[ParseResults, int, int], None, None]: | |
1203 | """ | |
1204 | Scan the input string for expression matches. Each match will return the | |
1205 | matching tokens, start location, and end location. May be called with optional | |
1206 | ``max_matches`` argument, to clip scanning after 'n' matches are found. If | |
1207 | ``overlap`` is specified, then overlapping matches will be reported. | |
1208 | ||
1209 | Note that the start and end locations are reported relative to the string | |
1210 | being parsed. See :class:`parse_string` for more information on parsing | |
1211 | strings with embedded tabs. | |
1212 | ||
1213 | Example:: | |
1214 | ||
1215 | source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" | |
1216 | print(source) | |
1217 | for tokens, start, end in Word(alphas).scan_string(source): | |
1218 | print(' '*start + '^'*(end-start)) | |
1219 | print(' '*start + tokens[0]) | |
1220 | ||
1221 | prints:: | |
1222 | ||
1223 | sldjf123lsdjjkf345sldkjf879lkjsfd987 | |
1224 | ^^^^^ | |
1225 | sldjf | |
1226 | ^^^^^^^ | |
1227 | lsdjjkf | |
1228 | ^^^^^^ | |
1229 | sldkjf | |
1230 | ^^^^^^ | |
1231 | lkjsfd | |
1232 | """ | |
1233 | maxMatches = min(maxMatches, max_matches) | |
1234 | if not self.streamlined: | |
1235 | self.streamline() | |
1236 | for e in self.ignoreExprs: | |
1237 | e.streamline() | |
1238 | ||
1239 | if not self.keepTabs: | |
1240 | instring = str(instring).expandtabs() | |
1241 | instrlen = len(instring) | |
1242 | loc = 0 | |
1243 | preparseFn = self.preParse | |
1244 | parseFn = self._parse | |
1245 | ParserElement.resetCache() | |
1246 | matches = 0 | |
1247 | try: | |
1248 | while loc <= instrlen and matches < maxMatches: | |
1249 | try: | |
1250 | preloc: int = preparseFn(instring, loc) | |
1251 | nextLoc: int | |
1252 | tokens: ParseResults | |
1253 | nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) | |
1254 | except ParseException: | |
1255 | loc = preloc + 1 | |
1256 | else: | |
1257 | if nextLoc > loc: | |
1258 | matches += 1 | |
1259 | if debug: | |
1260 | print( | |
1261 | { | |
1262 | "tokens": tokens.asList(), | |
1263 | "start": preloc, | |
1264 | "end": nextLoc, | |
1265 | } | |
1266 | ) | |
1267 | yield tokens, preloc, nextLoc | |
1268 | if overlap: | |
1269 | nextloc = preparseFn(instring, loc) | |
1270 | if nextloc > loc: | |
1271 | loc = nextLoc | |
1272 | else: | |
1273 | loc += 1 | |
1274 | else: | |
1275 | loc = nextLoc | |
1276 | else: | |
1277 | loc = preloc + 1 | |
1278 | except ParseBaseException as exc: | |
1279 | if ParserElement.verbose_stacktrace: | |
1280 | raise | |
1281 | else: | |
1282 | # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1283 | raise exc.with_traceback(None) | |
1284 | ||
1285 | def transform_string(self, instring: str, *, debug: bool = False) -> str: | |
1286 | """ | |
1287 | Extension to :class:`scan_string`, to modify matching text with modified tokens that may | |
1288 | be returned from a parse action. To use ``transform_string``, define a grammar and | |
1289 | attach a parse action to it that modifies the returned token list. | |
1290 | Invoking ``transform_string()`` on a target string will then scan for matches, | |
1291 | and replace the matched text patterns according to the logic in the parse | |
1292 | action. ``transform_string()`` returns the resulting transformed string. | |
1293 | ||
1294 | Example:: | |
1295 | ||
1296 | wd = Word(alphas) | |
1297 | wd.set_parse_action(lambda toks: toks[0].title()) | |
1298 | ||
1299 | print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) | |
1300 | ||
1301 | prints:: | |
1302 | ||
1303 | Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. | |
1304 | """ | |
1305 | out: List[str] = [] | |
1306 | lastE = 0 | |
1307 | # force preservation of <TAB>s, to minimize unwanted transformation of string, and to | |
1308 | # keep string locs straight between transform_string and scan_string | |
1309 | self.keepTabs = True | |
1310 | try: | |
1311 | for t, s, e in self.scan_string(instring, debug=debug): | |
1312 | out.append(instring[lastE:s]) | |
1313 | if t: | |
1314 | if isinstance(t, ParseResults): | |
1315 | out += t.as_list() | |
1316 | elif isinstance(t, Iterable) and not isinstance(t, str_type): | |
1317 | out.extend(t) | |
1318 | else: | |
1319 | out.append(t) | |
1320 | lastE = e | |
1321 | out.append(instring[lastE:]) | |
1322 | out = [o for o in out if o] | |
1323 | return "".join([str(s) for s in _flatten(out)]) | |
1324 | except ParseBaseException as exc: | |
1325 | if ParserElement.verbose_stacktrace: | |
1326 | raise | |
1327 | else: | |
1328 | # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1329 | raise exc.with_traceback(None) | |
1330 | ||
1331 | def search_string( | |
1332 | self, | |
1333 | instring: str, | |
1334 | max_matches: int = _MAX_INT, | |
1335 | *, | |
1336 | debug: bool = False, | |
1337 | maxMatches: int = _MAX_INT, | |
1338 | ) -> ParseResults: | |
1339 | """ | |
1340 | Another extension to :class:`scan_string`, simplifying the access to the tokens found | |
1341 | to match the given parse expression. May be called with optional | |
1342 | ``max_matches`` argument, to clip searching after 'n' matches are found. | |
1343 | ||
1344 | Example:: | |
1345 | ||
1346 | # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters | |
1347 | cap_word = Word(alphas.upper(), alphas.lower()) | |
1348 | ||
1349 | print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) | |
1350 | ||
1351 | # the sum() builtin can be used to merge results into a single ParseResults object | |
1352 | print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) | |
1353 | ||
1354 | prints:: | |
1355 | ||
1356 | [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] | |
1357 | ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] | |
1358 | """ | |
1359 | maxMatches = min(maxMatches, max_matches) | |
1360 | try: | |
1361 | return ParseResults( | |
1362 | [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] | |
1363 | ) | |
1364 | except ParseBaseException as exc: | |
1365 | if ParserElement.verbose_stacktrace: | |
1366 | raise | |
1367 | else: | |
1368 | # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1369 | raise exc.with_traceback(None) | |
1370 | ||
1371 | def split( | |
1372 | self, | |
1373 | instring: str, | |
1374 | maxsplit: int = _MAX_INT, | |
1375 | include_separators: bool = False, | |
1376 | *, | |
1377 | includeSeparators=False, | |
1378 | ) -> Generator[str, None, None]: | |
1379 | """ | |
1380 | Generator method to split a string using the given expression as a separator. | |
1381 | May be called with optional ``maxsplit`` argument, to limit the number of splits; | |
1382 | and the optional ``include_separators`` argument (default= ``False``), if the separating | |
1383 | matching text should be included in the split results. | |
1384 | ||
1385 | Example:: | |
1386 | ||
1387 | punc = one_of(list(".,;:/-!?")) | |
1388 | print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) | |
1389 | ||
1390 | prints:: | |
1391 | ||
1392 | ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] | |
1393 | """ | |
1394 | includeSeparators = includeSeparators or include_separators | |
1395 | last = 0 | |
1396 | for t, s, e in self.scan_string(instring, max_matches=maxsplit): | |
1397 | yield instring[last:s] | |
1398 | if includeSeparators: | |
1399 | yield t[0] | |
1400 | last = e | |
1401 | yield instring[last:] | |
1402 | ||
1403 | def __add__(self, other) -> "ParserElement": | |
1404 | """ | |
1405 | Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` | |
1406 | converts them to :class:`Literal`\\ s by default. | |
1407 | ||
1408 | Example:: | |
1409 | ||
1410 | greet = Word(alphas) + "," + Word(alphas) + "!" | |
1411 | hello = "Hello, World!" | |
1412 | print(hello, "->", greet.parse_string(hello)) | |
1413 | ||
1414 | prints:: | |
1415 | ||
1416 | Hello, World! -> ['Hello', ',', 'World', '!'] | |
1417 | ||
1418 | ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: | |
1419 | ||
1420 | Literal('start') + ... + Literal('end') | |
1421 | ||
1422 | is equivalent to:: | |
1423 | ||
1424 | Literal('start') + SkipTo('end')("_skipped*") + Literal('end') | |
1425 | ||
1426 | Note that the skipped text is returned with '_skipped' as a results name, | |
1427 | and to support having multiple skips in the same parser, the value returned is | |
1428 | a list of all skipped text. | |
1429 | """ | |
1430 | if other is Ellipsis: | |
1431 | return _PendingSkip(self) | |
1432 | ||
1433 | if isinstance(other, str_type): | |
1434 | other = self._literalStringClass(other) | |
1435 | if not isinstance(other, ParserElement): | |
1436 | return NotImplemented | |
1437 | return And([self, other]) | |
1438 | ||
1439 | def __radd__(self, other) -> "ParserElement": | |
1440 | """ | |
1441 | Implementation of ``+`` operator when left operand is not a :class:`ParserElement` | |
1442 | """ | |
1443 | if other is Ellipsis: | |
1444 | return SkipTo(self)("_skipped*") + self | |
1445 | ||
1446 | if isinstance(other, str_type): | |
1447 | other = self._literalStringClass(other) | |
1448 | if not isinstance(other, ParserElement): | |
1449 | return NotImplemented | |
1450 | return other + self | |
1451 | ||
1452 | def __sub__(self, other) -> "ParserElement": | |
1453 | """ | |
1454 | Implementation of ``-`` operator, returns :class:`And` with error stop | |
1455 | """ | |
1456 | if isinstance(other, str_type): | |
1457 | other = self._literalStringClass(other) | |
1458 | if not isinstance(other, ParserElement): | |
1459 | return NotImplemented | |
1460 | return self + And._ErrorStop() + other | |
1461 | ||
1462 | def __rsub__(self, other) -> "ParserElement": | |
1463 | """ | |
1464 | Implementation of ``-`` operator when left operand is not a :class:`ParserElement` | |
1465 | """ | |
1466 | if isinstance(other, str_type): | |
1467 | other = self._literalStringClass(other) | |
1468 | if not isinstance(other, ParserElement): | |
1469 | return NotImplemented | |
1470 | return other - self | |
1471 | ||
1472 | def __mul__(self, other) -> "ParserElement": | |
1473 | """ | |
1474 | Implementation of ``*`` operator, allows use of ``expr * 3`` in place of | |
1475 | ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer | |
1476 | tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples | |
1477 | may also include ``None`` as in: | |
1478 | ||
1479 | - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent | |
1480 | to ``expr*n + ZeroOrMore(expr)`` | |
1481 | (read as "at least n instances of ``expr``") | |
1482 | - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` | |
1483 | (read as "0 to n instances of ``expr``") | |
1484 | - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` | |
1485 | - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` | |
1486 | ||
1487 | Note that ``expr*(None, n)`` does not raise an exception if | |
1488 | more than n exprs exist in the input stream; that is, | |
1489 | ``expr*(None, n)`` does not enforce a maximum number of expr | |
1490 | occurrences. If this behavior is desired, then write | |
1491 | ``expr*(None, n) + ~expr`` | |
1492 | """ | |
1493 | if other is Ellipsis: | |
1494 | other = (0, None) | |
1495 | elif isinstance(other, tuple) and other[:1] == (Ellipsis,): | |
1496 | other = ((0,) + other[1:] + (None,))[:2] | |
1497 | ||
1498 | if isinstance(other, int): | |
1499 | minElements, optElements = other, 0 | |
1500 | elif isinstance(other, tuple): | |
1501 | other = tuple(o if o is not Ellipsis else None for o in other) | |
1502 | other = (other + (None, None))[:2] | |
1503 | if other[0] is None: | |
1504 | other = (0, other[1]) | |
1505 | if isinstance(other[0], int) and other[1] is None: | |
1506 | if other[0] == 0: | |
1507 | return ZeroOrMore(self) | |
1508 | if other[0] == 1: | |
1509 | return OneOrMore(self) | |
1510 | else: | |
1511 | return self * other[0] + ZeroOrMore(self) | |
1512 | elif isinstance(other[0], int) and isinstance(other[1], int): | |
1513 | minElements, optElements = other | |
1514 | optElements -= minElements | |
1515 | else: | |
1516 | return NotImplemented | |
1517 | else: | |
1518 | return NotImplemented | |
1519 | ||
1520 | if minElements < 0: | |
1521 | raise ValueError("cannot multiply ParserElement by negative value") | |
1522 | if optElements < 0: | |
1523 | raise ValueError( | |
1524 | "second tuple value must be greater or equal to first tuple value" | |
1525 | ) | |
1526 | if minElements == optElements == 0: | |
1527 | return And([]) | |
1528 | ||
1529 | if optElements: | |
1530 | ||
1531 | def makeOptionalList(n): | |
1532 | if n > 1: | |
1533 | return Opt(self + makeOptionalList(n - 1)) | |
1534 | else: | |
1535 | return Opt(self) | |
1536 | ||
1537 | if minElements: | |
1538 | if minElements == 1: | |
1539 | ret = self + makeOptionalList(optElements) | |
1540 | else: | |
1541 | ret = And([self] * minElements) + makeOptionalList(optElements) | |
1542 | else: | |
1543 | ret = makeOptionalList(optElements) | |
1544 | else: | |
1545 | if minElements == 1: | |
1546 | ret = self | |
1547 | else: | |
1548 | ret = And([self] * minElements) | |
1549 | return ret | |
1550 | ||
1551 | def __rmul__(self, other) -> "ParserElement": | |
1552 | return self.__mul__(other) | |
1553 | ||
1554 | def __or__(self, other) -> "ParserElement": | |
1555 | """ | |
1556 | Implementation of ``|`` operator - returns :class:`MatchFirst` | |
1557 | """ | |
1558 | if other is Ellipsis: | |
1559 | return _PendingSkip(self, must_skip=True) | |
1560 | ||
1561 | if isinstance(other, str_type): | |
1562 | # `expr | ""` is equivalent to `Opt(expr)` | |
1563 | if other == "": | |
1564 | return Opt(self) | |
1565 | other = self._literalStringClass(other) | |
1566 | if not isinstance(other, ParserElement): | |
1567 | return NotImplemented | |
1568 | return MatchFirst([self, other]) | |
1569 | ||
1570 | def __ror__(self, other) -> "ParserElement": | |
1571 | """ | |
1572 | Implementation of ``|`` operator when left operand is not a :class:`ParserElement` | |
1573 | """ | |
1574 | if isinstance(other, str_type): | |
1575 | other = self._literalStringClass(other) | |
1576 | if not isinstance(other, ParserElement): | |
1577 | return NotImplemented | |
1578 | return other | self | |
1579 | ||
1580 | def __xor__(self, other) -> "ParserElement": | |
1581 | """ | |
1582 | Implementation of ``^`` operator - returns :class:`Or` | |
1583 | """ | |
1584 | if isinstance(other, str_type): | |
1585 | other = self._literalStringClass(other) | |
1586 | if not isinstance(other, ParserElement): | |
1587 | return NotImplemented | |
1588 | return Or([self, other]) | |
1589 | ||
1590 | def __rxor__(self, other) -> "ParserElement": | |
1591 | """ | |
1592 | Implementation of ``^`` operator when left operand is not a :class:`ParserElement` | |
1593 | """ | |
1594 | if isinstance(other, str_type): | |
1595 | other = self._literalStringClass(other) | |
1596 | if not isinstance(other, ParserElement): | |
1597 | return NotImplemented | |
1598 | return other ^ self | |
1599 | ||
1600 | def __and__(self, other) -> "ParserElement": | |
1601 | """ | |
1602 | Implementation of ``&`` operator - returns :class:`Each` | |
1603 | """ | |
1604 | if isinstance(other, str_type): | |
1605 | other = self._literalStringClass(other) | |
1606 | if not isinstance(other, ParserElement): | |
1607 | return NotImplemented | |
1608 | return Each([self, other]) | |
1609 | ||
1610 | def __rand__(self, other) -> "ParserElement": | |
1611 | """ | |
1612 | Implementation of ``&`` operator when left operand is not a :class:`ParserElement` | |
1613 | """ | |
1614 | if isinstance(other, str_type): | |
1615 | other = self._literalStringClass(other) | |
1616 | if not isinstance(other, ParserElement): | |
1617 | return NotImplemented | |
1618 | return other & self | |
1619 | ||
1620 | def __invert__(self) -> "ParserElement": | |
1621 | """ | |
1622 | Implementation of ``~`` operator - returns :class:`NotAny` | |
1623 | """ | |
1624 | return NotAny(self) | |
1625 | ||
1626 | # disable __iter__ to override legacy use of sequential access to __getitem__ to | |
1627 | # iterate over a sequence | |
1628 | __iter__ = None | |
1629 | ||
1630 | def __getitem__(self, key): | |
1631 | """ | |
1632 | use ``[]`` indexing notation as a short form for expression repetition: | |
1633 | ||
1634 | - ``expr[n]`` is equivalent to ``expr*n`` | |
1635 | - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` | |
1636 | - ``expr[n, ...]`` or ``expr[n,]`` is equivalent | |
1637 | to ``expr*n + ZeroOrMore(expr)`` | |
1638 | (read as "at least n instances of ``expr``") | |
1639 | - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` | |
1640 | (read as "0 to n instances of ``expr``") | |
1641 | - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` | |
1642 | - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` | |
1643 | ||
1644 | ``None`` may be used in place of ``...``. | |
1645 | ||
1646 | Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception | |
1647 | if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is | |
1648 | desired, then write ``expr[..., n] + ~expr``. | |
1649 | ||
1650 | For repetition with a stop_on expression, use slice notation: | |
1651 | ||
1652 | - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` | |
1653 | - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` | |
1654 | ||
1655 | """ | |
1656 | ||
1657 | stop_on_defined = False | |
1658 | stop_on = NoMatch() | |
1659 | if isinstance(key, slice): | |
1660 | key, stop_on = key.start, key.stop | |
1661 | if key is None: | |
1662 | key = ... | |
1663 | stop_on_defined = True | |
1664 | elif isinstance(key, tuple) and isinstance(key[-1], slice): | |
1665 | key, stop_on = (key[0], key[1].start), key[1].stop | |
1666 | stop_on_defined = True | |
1667 | ||
1668 | # convert single arg keys to tuples | |
1669 | if isinstance(key, str_type): | |
1670 | key = (key,) | |
1671 | try: | |
1672 | iter(key) | |
1673 | except TypeError: | |
1674 | key = (key, key) | |
1675 | ||
1676 | if len(key) > 2: | |
1677 | raise TypeError( | |
1678 | f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" | |
1679 | ) | |
1680 | ||
1681 | # clip to 2 elements | |
1682 | ret = self * tuple(key[:2]) | |
1683 | ret = typing.cast(_MultipleMatch, ret) | |
1684 | ||
1685 | if stop_on_defined: | |
1686 | ret.stopOn(stop_on) | |
1687 | ||
1688 | return ret | |
1689 | ||
1690 | def __call__(self, name: typing.Optional[str] = None) -> "ParserElement": | |
1691 | """ | |
1692 | Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. | |
1693 | ||
1694 | If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be | |
1695 | passed as ``True``. | |
1696 | ||
1697 | If ``name`` is omitted, same as calling :class:`copy`. | |
1698 | ||
1699 | Example:: | |
1700 | ||
1701 | # these are equivalent | |
1702 | userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") | |
1703 | userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") | |
1704 | """ | |
1705 | if name is not None: | |
1706 | return self._setResultsName(name) | |
1707 | else: | |
1708 | return self.copy() | |
1709 | ||
1710 | def suppress(self) -> "ParserElement": | |
1711 | """ | |
1712 | Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from | |
1713 | cluttering up returned output. | |
1714 | """ | |
1715 | return Suppress(self) | |
1716 | ||
1717 | def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": | |
1718 | """ | |
1719 | Enables the skipping of whitespace before matching the characters in the | |
1720 | :class:`ParserElement`'s defined pattern. | |
1721 | ||
1722 | :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) | |
1723 | """ | |
1724 | self.skipWhitespace = True | |
1725 | return self | |
1726 | ||
1727 | def leave_whitespace(self, recursive: bool = True) -> "ParserElement": | |
1728 | """ | |
1729 | Disables the skipping of whitespace before matching the characters in the | |
1730 | :class:`ParserElement`'s defined pattern. This is normally only used internally by | |
1731 | the pyparsing module, but may be needed in some whitespace-sensitive grammars. | |
1732 | ||
1733 | :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) | |
1734 | """ | |
1735 | self.skipWhitespace = False | |
1736 | return self | |
1737 | ||
1738 | def set_whitespace_chars( | |
1739 | self, chars: Union[Set[str], str], copy_defaults: bool = False | |
1740 | ) -> "ParserElement": | |
1741 | """ | |
1742 | Overrides the default whitespace chars | |
1743 | """ | |
1744 | self.skipWhitespace = True | |
1745 | self.whiteChars = set(chars) | |
1746 | self.copyDefaultWhiteChars = copy_defaults | |
1747 | return self | |
1748 | ||
1749 | def parse_with_tabs(self) -> "ParserElement": | |
1750 | """ | |
1751 | Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. | |
1752 | Must be called before ``parse_string`` when the input grammar contains elements that | |
1753 | match ``<TAB>`` characters. | |
1754 | """ | |
1755 | self.keepTabs = True | |
1756 | return self | |
1757 | ||
1758 | def ignore(self, other: "ParserElement") -> "ParserElement": | |
1759 | """ | |
1760 | Define expression to be ignored (e.g., comments) while doing pattern | |
1761 | matching; may be called repeatedly, to define multiple comment or other | |
1762 | ignorable patterns. | |
1763 | ||
1764 | Example:: | |
1765 | ||
1766 | patt = Word(alphas)[1, ...] | |
1767 | patt.parse_string('ablaj /* comment */ lskjd') | |
1768 | # -> ['ablaj'] | |
1769 | ||
1770 | patt.ignore(c_style_comment) | |
1771 | patt.parse_string('ablaj /* comment */ lskjd') | |
1772 | # -> ['ablaj', 'lskjd'] | |
1773 | """ | |
1774 | import typing | |
1775 | ||
1776 | if isinstance(other, str_type): | |
1777 | other = Suppress(other) | |
1778 | ||
1779 | if isinstance(other, Suppress): | |
1780 | if other not in self.ignoreExprs: | |
1781 | self.ignoreExprs.append(other) | |
1782 | else: | |
1783 | self.ignoreExprs.append(Suppress(other.copy())) | |
1784 | return self | |
1785 | ||
1786 | def set_debug_actions( | |
1787 | self, | |
1788 | start_action: DebugStartAction, | |
1789 | success_action: DebugSuccessAction, | |
1790 | exception_action: DebugExceptionAction, | |
1791 | ) -> "ParserElement": | |
1792 | """ | |
1793 | Customize display of debugging messages while doing pattern matching: | |
1794 | ||
1795 | - ``start_action`` - method to be called when an expression is about to be parsed; | |
1796 | should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` | |
1797 | ||
1798 | - ``success_action`` - method to be called when an expression has successfully parsed; | |
1799 | should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` | |
1800 | ||
1801 | - ``exception_action`` - method to be called when expression fails to parse; | |
1802 | should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` | |
1803 | """ | |
1804 | self.debugActions = self.DebugActions( | |
1805 | start_action or _default_start_debug_action, # type: ignore[truthy-function] | |
1806 | success_action or _default_success_debug_action, # type: ignore[truthy-function] | |
1807 | exception_action or _default_exception_debug_action, # type: ignore[truthy-function] | |
1808 | ) | |
1809 | self.debug = True | |
1810 | return self | |
1811 | ||
1812 | def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement": | |
1813 | """ | |
1814 | Enable display of debugging messages while doing pattern matching. | |
1815 | Set ``flag`` to ``True`` to enable, ``False`` to disable. | |
1816 | Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. | |
1817 | ||
1818 | Example:: | |
1819 | ||
1820 | wd = Word(alphas).set_name("alphaword") | |
1821 | integer = Word(nums).set_name("numword") | |
1822 | term = wd | integer | |
1823 | ||
1824 | # turn on debugging for wd | |
1825 | wd.set_debug() | |
1826 | ||
1827 | term[1, ...].parse_string("abc 123 xyz 890") | |
1828 | ||
1829 | prints:: | |
1830 | ||
1831 | Match alphaword at loc 0(1,1) | |
1832 | Matched alphaword -> ['abc'] | |
1833 | Match alphaword at loc 3(1,4) | |
1834 | Exception raised:Expected alphaword (at char 4), (line:1, col:5) | |
1835 | Match alphaword at loc 7(1,8) | |
1836 | Matched alphaword -> ['xyz'] | |
1837 | Match alphaword at loc 11(1,12) | |
1838 | Exception raised:Expected alphaword (at char 12), (line:1, col:13) | |
1839 | Match alphaword at loc 15(1,16) | |
1840 | Exception raised:Expected alphaword (at char 15), (line:1, col:16) | |
1841 | ||
1842 | The output shown is that produced by the default debug actions - custom debug actions can be | |
1843 | specified using :class:`set_debug_actions`. Prior to attempting | |
1844 | to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` | |
1845 | is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` | |
1846 | message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, | |
1847 | which makes debugging and exception messages easier to understand - for instance, the default | |
1848 | name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. | |
1849 | """ | |
1850 | if recurse: | |
1851 | for expr in self.visit_all(): | |
1852 | expr.set_debug(flag, recurse=False) | |
1853 | return self | |
1854 | ||
1855 | if flag: | |
1856 | self.set_debug_actions( | |
1857 | _default_start_debug_action, | |
1858 | _default_success_debug_action, | |
1859 | _default_exception_debug_action, | |
1860 | ) | |
1861 | else: | |
1862 | self.debug = False | |
1863 | return self | |
1864 | ||
1865 | @property | |
1866 | def default_name(self) -> str: | |
1867 | if self._defaultName is None: | |
1868 | self._defaultName = self._generateDefaultName() | |
1869 | return self._defaultName | |
1870 | ||
1871 | @abstractmethod | |
1872 | def _generateDefaultName(self) -> str: | |
1873 | """ | |
1874 | Child classes must define this method, which defines how the ``default_name`` is set. | |
1875 | """ | |
1876 | ||
1877 | def set_name(self, name: str) -> "ParserElement": | |
1878 | """ | |
1879 | Define name for this expression, makes debugging and exception messages clearer. | |
1880 | ||
1881 | Example:: | |
1882 | ||
1883 | Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) | |
1884 | Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) | |
1885 | """ | |
1886 | self.customName = name | |
1887 | self.errmsg = "Expected " + self.name | |
1888 | if __diag__.enable_debug_on_named_expressions: | |
1889 | self.set_debug() | |
1890 | return self | |
1891 | ||
1892 | @property | |
1893 | def name(self) -> str: | |
1894 | # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name | |
1895 | return self.customName if self.customName is not None else self.default_name | |
1896 | ||
1897 | def __str__(self) -> str: | |
1898 | return self.name | |
1899 | ||
1900 | def __repr__(self) -> str: | |
1901 | return str(self) | |
1902 | ||
1903 | def streamline(self) -> "ParserElement": | |
1904 | self.streamlined = True | |
1905 | self._defaultName = None | |
1906 | return self | |
1907 | ||
1908 | def recurse(self) -> List["ParserElement"]: | |
1909 | return [] | |
1910 | ||
1911 | def _checkRecursion(self, parseElementList): | |
1912 | subRecCheckList = parseElementList[:] + [self] | |
1913 | for e in self.recurse(): | |
1914 | e._checkRecursion(subRecCheckList) | |
1915 | ||
1916 | def validate(self, validateTrace=None) -> None: | |
1917 | """ | |
1918 | Check defined expressions for valid structure, check for infinite recursive definitions. | |
1919 | """ | |
1920 | warnings.warn( | |
1921 | "ParserElement.validate() is deprecated, and should not be used to check for left recursion", | |
1922 | DeprecationWarning, | |
1923 | stacklevel=2, | |
1924 | ) | |
1925 | self._checkRecursion([]) | |
1926 | ||
1927 | def parse_file( | |
1928 | self, | |
1929 | file_or_filename: Union[str, Path, TextIO], | |
1930 | encoding: str = "utf-8", | |
1931 | parse_all: bool = False, | |
1932 | *, | |
1933 | parseAll: bool = False, | |
1934 | ) -> ParseResults: | |
1935 | """ | |
1936 | Execute the parse expression on the given file or filename. | |
1937 | If a filename is specified (instead of a file object), | |
1938 | the entire file is opened, read, and closed before parsing. | |
1939 | """ | |
1940 | parseAll = parseAll or parse_all | |
1941 | try: | |
1942 | file_or_filename = typing.cast(TextIO, file_or_filename) | |
1943 | file_contents = file_or_filename.read() | |
1944 | except AttributeError: | |
1945 | file_or_filename = typing.cast(str, file_or_filename) | |
1946 | with open(file_or_filename, "r", encoding=encoding) as f: | |
1947 | file_contents = f.read() | |
1948 | try: | |
1949 | return self.parse_string(file_contents, parseAll) | |
1950 | except ParseBaseException as exc: | |
1951 | if ParserElement.verbose_stacktrace: | |
1952 | raise | |
1953 | else: | |
1954 | # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1955 | raise exc.with_traceback(None) | |
1956 | ||
1957 | def __eq__(self, other): | |
1958 | if self is other: | |
1959 | return True | |
1960 | elif isinstance(other, str_type): | |
1961 | return self.matches(other, parse_all=True) | |
1962 | elif isinstance(other, ParserElement): | |
1963 | return vars(self) == vars(other) | |
1964 | return False | |
1965 | ||
1966 | def __hash__(self): | |
1967 | return id(self) | |
1968 | ||
1969 | def matches( | |
1970 | self, test_string: str, parse_all: bool = True, *, parseAll: bool = True | |
1971 | ) -> bool: | |
1972 | """ | |
1973 | Method for quick testing of a parser against a test string. Good for simple | |
1974 | inline microtests of sub expressions while building up larger parser. | |
1975 | ||
1976 | Parameters: | |
1977 | ||
1978 | - ``test_string`` - to test against this expression for a match | |
1979 | - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests | |
1980 | ||
1981 | Example:: | |
1982 | ||
1983 | expr = Word(nums) | |
1984 | assert expr.matches("100") | |
1985 | """ | |
1986 | parseAll = parseAll and parse_all | |
1987 | try: | |
1988 | self.parse_string(str(test_string), parse_all=parseAll) | |
1989 | return True | |
1990 | except ParseBaseException: | |
1991 | return False | |
1992 | ||
1993 | def run_tests( | |
1994 | self, | |
1995 | tests: Union[str, List[str]], | |
1996 | parse_all: bool = True, | |
1997 | comment: typing.Optional[Union["ParserElement", str]] = "#", | |
1998 | full_dump: bool = True, | |
1999 | print_results: bool = True, | |
2000 | failure_tests: bool = False, | |
2001 | post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None, | |
2002 | file: typing.Optional[TextIO] = None, | |
2003 | with_line_numbers: bool = False, | |
2004 | *, | |
2005 | parseAll: bool = True, | |
2006 | fullDump: bool = True, | |
2007 | printResults: bool = True, | |
2008 | failureTests: bool = False, | |
2009 | postParse: typing.Optional[Callable[[str, ParseResults], str]] = None, | |
2010 | ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: | |
2011 | """ | |
2012 | Execute the parse expression on a series of test strings, showing each | |
2013 | test, the parsed results or where the parse failed. Quick and easy way to | |
2014 | run a parse expression against a list of sample strings. | |
2015 | ||
2016 | Parameters: | |
2017 | ||
2018 | - ``tests`` - a list of separate test strings, or a multiline string of test strings | |
2019 | - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests | |
2020 | - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test | |
2021 | string; pass None to disable comment filtering | |
2022 | - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; | |
2023 | if False, only dump nested list | |
2024 | - ``print_results`` - (default= ``True``) prints test output to stdout | |
2025 | - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing | |
2026 | - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as | |
2027 | `fn(test_string, parse_results)` and returns a string to be added to the test output | |
2028 | - ``file`` - (default= ``None``) optional file-like object to which test output will be written; | |
2029 | if None, will default to ``sys.stdout`` | |
2030 | - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers | |
2031 | ||
2032 | Returns: a (success, results) tuple, where success indicates that all tests succeeded | |
2033 | (or failed if ``failure_tests`` is True), and the results contain a list of lines of each | |
2034 | test's output | |
2035 | ||
2036 | Example:: | |
2037 | ||
2038 | number_expr = pyparsing_common.number.copy() | |
2039 | ||
2040 | result = number_expr.run_tests(''' | |
2041 | # unsigned integer | |
2042 | 100 | |
2043 | # negative integer | |
2044 | -100 | |
2045 | # float with scientific notation | |
2046 | 6.02e23 | |
2047 | # integer with scientific notation | |
2048 | 1e-12 | |
2049 | ''') | |
2050 | print("Success" if result[0] else "Failed!") | |
2051 | ||
2052 | result = number_expr.run_tests(''' | |
2053 | # stray character | |
2054 | 100Z | |
2055 | # missing leading digit before '.' | |
2056 | -.100 | |
2057 | # too many '.' | |
2058 | 3.14.159 | |
2059 | ''', failure_tests=True) | |
2060 | print("Success" if result[0] else "Failed!") | |
2061 | ||
2062 | prints:: | |
2063 | ||
2064 | # unsigned integer | |
2065 | 100 | |
2066 | [100] | |
2067 | ||
2068 | # negative integer | |
2069 | -100 | |
2070 | [-100] | |
2071 | ||
2072 | # float with scientific notation | |
2073 | 6.02e23 | |
2074 | [6.02e+23] | |
2075 | ||
2076 | # integer with scientific notation | |
2077 | 1e-12 | |
2078 | [1e-12] | |
2079 | ||
2080 | Success | |
2081 | ||
2082 | # stray character | |
2083 | 100Z | |
2084 | ^ | |
2085 | FAIL: Expected end of text (at char 3), (line:1, col:4) | |
2086 | ||
2087 | # missing leading digit before '.' | |
2088 | -.100 | |
2089 | ^ | |
2090 | FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) | |
2091 | ||
2092 | # too many '.' | |
2093 | 3.14.159 | |
2094 | ^ | |
2095 | FAIL: Expected end of text (at char 4), (line:1, col:5) | |
2096 | ||
2097 | Success | |
2098 | ||
2099 | Each test string must be on a single line. If you want to test a string that spans multiple | |
2100 | lines, create a test like this:: | |
2101 | ||
2102 | expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") | |
2103 | ||
2104 | (Note that this is a raw string literal, you must include the leading ``'r'``.) | |
2105 | """ | |
2106 | from .testing import pyparsing_test | |
2107 | ||
2108 | parseAll = parseAll and parse_all | |
2109 | fullDump = fullDump and full_dump | |
2110 | printResults = printResults and print_results | |
2111 | failureTests = failureTests or failure_tests | |
2112 | postParse = postParse or post_parse | |
2113 | if isinstance(tests, str_type): | |
2114 | tests = typing.cast(str, tests) | |
2115 | line_strip = type(tests).strip | |
2116 | tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] | |
2117 | comment_specified = comment is not None | |
2118 | if comment_specified: | |
2119 | if isinstance(comment, str_type): | |
2120 | comment = typing.cast(str, comment) | |
2121 | comment = Literal(comment) | |
2122 | comment = typing.cast(ParserElement, comment) | |
2123 | if file is None: | |
2124 | file = sys.stdout | |
2125 | print_ = file.write | |
2126 | ||
2127 | result: Union[ParseResults, Exception] | |
2128 | allResults: List[Tuple[str, Union[ParseResults, Exception]]] = [] | |
2129 | comments: List[str] = [] | |
2130 | success = True | |
2131 | NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) | |
2132 | BOM = "\ufeff" | |
2133 | for t in tests: | |
2134 | if comment_specified and comment.matches(t, False) or comments and not t: | |
2135 | comments.append( | |
2136 | pyparsing_test.with_line_numbers(t) if with_line_numbers else t | |
2137 | ) | |
2138 | continue | |
2139 | if not t: | |
2140 | continue | |
2141 | out = [ | |
2142 | "\n" + "\n".join(comments) if comments else "", | |
2143 | pyparsing_test.with_line_numbers(t) if with_line_numbers else t, | |
2144 | ] | |
2145 | comments = [] | |
2146 | try: | |
2147 | # convert newline marks to actual newlines, and strip leading BOM if present | |
2148 | t = NL.transform_string(t.lstrip(BOM)) | |
2149 | result = self.parse_string(t, parse_all=parseAll) | |
2150 | except ParseBaseException as pe: | |
2151 | fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" | |
2152 | out.append(pe.explain()) | |
2153 | out.append("FAIL: " + str(pe)) | |
2154 | if ParserElement.verbose_stacktrace: | |
2155 | out.extend(traceback.format_tb(pe.__traceback__)) | |
2156 | success = success and failureTests | |
2157 | result = pe | |
2158 | except Exception as exc: | |
2159 | out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}") | |
2160 | if ParserElement.verbose_stacktrace: | |
2161 | out.extend(traceback.format_tb(exc.__traceback__)) | |
2162 | success = success and failureTests | |
2163 | result = exc | |
2164 | else: | |
2165 | success = success and not failureTests | |
2166 | if postParse is not None: | |
2167 | try: | |
2168 | pp_value = postParse(t, result) | |
2169 | if pp_value is not None: | |
2170 | if isinstance(pp_value, ParseResults): | |
2171 | out.append(pp_value.dump()) | |
2172 | else: | |
2173 | out.append(str(pp_value)) | |
2174 | else: | |
2175 | out.append(result.dump()) | |
2176 | except Exception as e: | |
2177 | out.append(result.dump(full=fullDump)) | |
2178 | out.append( | |
2179 | f"{postParse.__name__} failed: {type(e).__name__}: {e}" | |
2180 | ) | |
2181 | else: | |
2182 | out.append(result.dump(full=fullDump)) | |
2183 | out.append("") | |
2184 | ||
2185 | if printResults: | |
2186 | print_("\n".join(out)) | |
2187 | ||
2188 | allResults.append((t, result)) | |
2189 | ||
2190 | return success, allResults | |
2191 | ||
2192 | def create_diagram( | |
2193 | self, | |
2194 | output_html: Union[TextIO, Path, str], | |
2195 | vertical: int = 3, | |
2196 | show_results_names: bool = False, | |
2197 | show_groups: bool = False, | |
2198 | embed: bool = False, | |
2199 | **kwargs, | |
2200 | ) -> None: | |
2201 | """ | |
2202 | Create a railroad diagram for the parser. | |
2203 | ||
2204 | Parameters: | |
2205 | ||
2206 | - ``output_html`` (str or file-like object) - output target for generated | |
2207 | diagram HTML | |
2208 | - ``vertical`` (int) - threshold for formatting multiple alternatives vertically | |
2209 | instead of horizontally (default=3) | |
2210 | - ``show_results_names`` - bool flag whether diagram should show annotations for | |
2211 | defined results names | |
2212 | - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box | |
2213 | - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed | |
2214 | the resulting HTML in an enclosing HTML source | |
2215 | - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; | |
2216 | can be used to insert custom CSS styling | |
2217 | - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the | |
2218 | generated code | |
2219 | ||
2220 | Additional diagram-formatting keyword arguments can also be included; | |
2221 | see railroad.Diagram class. | |
2222 | """ | |
2223 | ||
2224 | try: | |
2225 | from .diagram import to_railroad, railroad_to_html | |
2226 | except ImportError as ie: | |
2227 | raise Exception( | |
2228 | "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" | |
2229 | ) from ie | |
2230 | ||
2231 | self.streamline() | |
2232 | ||
2233 | railroad = to_railroad( | |
2234 | self, | |
2235 | vertical=vertical, | |
2236 | show_results_names=show_results_names, | |
2237 | show_groups=show_groups, | |
2238 | diagram_kwargs=kwargs, | |
2239 | ) | |
2240 | if isinstance(output_html, (str, Path)): | |
2241 | with open(output_html, "w", encoding="utf-8") as diag_file: | |
2242 | diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) | |
2243 | else: | |
2244 | # we were passed a file-like object, just write to it | |
2245 | output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) | |
2246 | ||
2247 | # Compatibility synonyms | |
2248 | # fmt: off | |
2249 | @staticmethod | |
2250 | @replaced_by_pep8(inline_literals_using) | |
2251 | def inlineLiteralsUsing(): ... | |
2252 | ||
2253 | @staticmethod | |
2254 | @replaced_by_pep8(set_default_whitespace_chars) | |
2255 | def setDefaultWhitespaceChars(): ... | |
2256 | ||
2257 | @replaced_by_pep8(set_results_name) | |
2258 | def setResultsName(self): ... | |
2259 | ||
2260 | @replaced_by_pep8(set_break) | |
2261 | def setBreak(self): ... | |
2262 | ||
2263 | @replaced_by_pep8(set_parse_action) | |
2264 | def setParseAction(self): ... | |
2265 | ||
2266 | @replaced_by_pep8(add_parse_action) | |
2267 | def addParseAction(self): ... | |
2268 | ||
2269 | @replaced_by_pep8(add_condition) | |
2270 | def addCondition(self): ... | |
2271 | ||
2272 | @replaced_by_pep8(set_fail_action) | |
2273 | def setFailAction(self): ... | |
2274 | ||
2275 | @replaced_by_pep8(try_parse) | |
2276 | def tryParse(self): ... | |
2277 | ||
2278 | @staticmethod | |
2279 | @replaced_by_pep8(enable_left_recursion) | |
2280 | def enableLeftRecursion(): ... | |
2281 | ||
2282 | @staticmethod | |
2283 | @replaced_by_pep8(enable_packrat) | |
2284 | def enablePackrat(): ... | |
2285 | ||
2286 | @replaced_by_pep8(parse_string) | |
2287 | def parseString(self): ... | |
2288 | ||
2289 | @replaced_by_pep8(scan_string) | |
2290 | def scanString(self): ... | |
2291 | ||
2292 | @replaced_by_pep8(transform_string) | |
2293 | def transformString(self): ... | |
2294 | ||
2295 | @replaced_by_pep8(search_string) | |
2296 | def searchString(self): ... | |
2297 | ||
2298 | @replaced_by_pep8(ignore_whitespace) | |
2299 | def ignoreWhitespace(self): ... | |
2300 | ||
2301 | @replaced_by_pep8(leave_whitespace) | |
2302 | def leaveWhitespace(self): ... | |
2303 | ||
2304 | @replaced_by_pep8(set_whitespace_chars) | |
2305 | def setWhitespaceChars(self): ... | |
2306 | ||
2307 | @replaced_by_pep8(parse_with_tabs) | |
2308 | def parseWithTabs(self): ... | |
2309 | ||
2310 | @replaced_by_pep8(set_debug_actions) | |
2311 | def setDebugActions(self): ... | |
2312 | ||
2313 | @replaced_by_pep8(set_debug) | |
2314 | def setDebug(self): ... | |
2315 | ||
2316 | @replaced_by_pep8(set_name) | |
2317 | def setName(self): ... | |
2318 | ||
2319 | @replaced_by_pep8(parse_file) | |
2320 | def parseFile(self): ... | |
2321 | ||
2322 | @replaced_by_pep8(run_tests) | |
2323 | def runTests(self): ... | |
2324 | ||
2325 | canParseNext = can_parse_next | |
2326 | resetCache = reset_cache | |
2327 | defaultName = default_name | |
2328 | # fmt: on | |
2329 | ||
2330 | ||
2331 | class _PendingSkip(ParserElement): | |
2332 | # internal placeholder class to hold a place were '...' is added to a parser element, | |
2333 | # once another ParserElement is added, this placeholder will be replaced with a SkipTo | |
2334 | def __init__(self, expr: ParserElement, must_skip: bool = False): | |
2335 | super().__init__() | |
2336 | self.anchor = expr | |
2337 | self.must_skip = must_skip | |
2338 | ||
2339 | def _generateDefaultName(self) -> str: | |
2340 | return str(self.anchor + Empty()).replace("Empty", "...") | |
2341 | ||
2342 | def __add__(self, other) -> "ParserElement": | |
2343 | skipper = SkipTo(other).set_name("...")("_skipped*") | |
2344 | if self.must_skip: | |
2345 | ||
2346 | def must_skip(t): | |
2347 | if not t._skipped or t._skipped.as_list() == [""]: | |
2348 | del t[0] | |
2349 | t.pop("_skipped", None) | |
2350 | ||
2351 | def show_skip(t): | |
2352 | if t._skipped.as_list()[-1:] == [""]: | |
2353 | t.pop("_skipped") | |
2354 | t["_skipped"] = "missing <" + repr(self.anchor) + ">" | |
2355 | ||
2356 | return ( | |
2357 | self.anchor + skipper().add_parse_action(must_skip) | |
2358 | | skipper().add_parse_action(show_skip) | |
2359 | ) + other | |
2360 | ||
2361 | return self.anchor + skipper + other | |
2362 | ||
2363 | def __repr__(self): | |
2364 | return self.defaultName | |
2365 | ||
2366 | def parseImpl(self, *args): | |
2367 | raise Exception( | |
2368 | "use of `...` expression without following SkipTo target expression" | |
2369 | ) | |
2370 | ||
2371 | ||
2372 | class Token(ParserElement): | |
2373 | """Abstract :class:`ParserElement` subclass, for defining atomic | |
2374 | matching patterns. | |
2375 | """ | |
2376 | ||
2377 | def __init__(self): | |
2378 | super().__init__(savelist=False) | |
2379 | ||
2380 | def _generateDefaultName(self) -> str: | |
2381 | return type(self).__name__ | |
2382 | ||
2383 | ||
2384 | class NoMatch(Token): | |
2385 | """ | |
2386 | A token that will never match. | |
2387 | """ | |
2388 | ||
2389 | def __init__(self): | |
2390 | super().__init__() | |
2391 | self.mayReturnEmpty = True | |
2392 | self.mayIndexError = False | |
2393 | self.errmsg = "Unmatchable token" | |
2394 | ||
2395 | def parseImpl(self, instring, loc, doActions=True): | |
2396 | raise ParseException(instring, loc, self.errmsg, self) | |
2397 | ||
2398 | ||
2399 | class Literal(Token): | |
2400 | """ | |
2401 | Token to exactly match a specified string. | |
2402 | ||
2403 | Example:: | |
2404 | ||
2405 | Literal('blah').parse_string('blah') # -> ['blah'] | |
2406 | Literal('blah').parse_string('blahfooblah') # -> ['blah'] | |
2407 | Literal('blah').parse_string('bla') # -> Exception: Expected "blah" | |
2408 | ||
2409 | For case-insensitive matching, use :class:`CaselessLiteral`. | |
2410 | ||
2411 | For keyword matching (force word break before and after the matched string), | |
2412 | use :class:`Keyword` or :class:`CaselessKeyword`. | |
2413 | """ | |
2414 | ||
2415 | def __new__(cls, match_string: str = "", *, matchString: str = ""): | |
2416 | # Performance tuning: select a subclass with optimized parseImpl | |
2417 | if cls is Literal: | |
2418 | match_string = matchString or match_string | |
2419 | if not match_string: | |
2420 | return super().__new__(Empty) | |
2421 | if len(match_string) == 1: | |
2422 | return super().__new__(_SingleCharLiteral) | |
2423 | ||
2424 | # Default behavior | |
2425 | return super().__new__(cls) | |
2426 | ||
2427 | # Needed to make copy.copy() work correctly if we customize __new__ | |
2428 | def __getnewargs__(self): | |
2429 | return (self.match,) | |
2430 | ||
2431 | def __init__(self, match_string: str = "", *, matchString: str = ""): | |
2432 | super().__init__() | |
2433 | match_string = matchString or match_string | |
2434 | self.match = match_string | |
2435 | self.matchLen = len(match_string) | |
2436 | self.firstMatchChar = match_string[:1] | |
2437 | self.errmsg = "Expected " + self.name | |
2438 | self.mayReturnEmpty = False | |
2439 | self.mayIndexError = False | |
2440 | ||
2441 | def _generateDefaultName(self) -> str: | |
2442 | return repr(self.match) | |
2443 | ||
2444 | def parseImpl(self, instring, loc, doActions=True): | |
2445 | if instring[loc] == self.firstMatchChar and instring.startswith( | |
2446 | self.match, loc | |
2447 | ): | |
2448 | return loc + self.matchLen, self.match | |
2449 | raise ParseException(instring, loc, self.errmsg, self) | |
2450 | ||
2451 | ||
2452 | class Empty(Literal): | |
2453 | """ | |
2454 | An empty token, will always match. | |
2455 | """ | |
2456 | ||
2457 | def __init__(self, match_string="", *, matchString=""): | |
2458 | super().__init__("") | |
2459 | self.mayReturnEmpty = True | |
2460 | self.mayIndexError = False | |
2461 | ||
2462 | def _generateDefaultName(self) -> str: | |
2463 | return "Empty" | |
2464 | ||
2465 | def parseImpl(self, instring, loc, doActions=True): | |
2466 | return loc, [] | |
2467 | ||
2468 | ||
2469 | class _SingleCharLiteral(Literal): | |
2470 | def parseImpl(self, instring, loc, doActions=True): | |
2471 | if instring[loc] == self.firstMatchChar: | |
2472 | return loc + 1, self.match | |
2473 | raise ParseException(instring, loc, self.errmsg, self) | |
2474 | ||
2475 | ||
2476 | ParserElement._literalStringClass = Literal | |
2477 | ||
2478 | ||
2479 | class Keyword(Token): | |
2480 | """ | |
2481 | Token to exactly match a specified string as a keyword, that is, | |
2482 | it must be immediately preceded and followed by whitespace or | |
2483 | non-keyword characters. Compare with :class:`Literal`: | |
2484 | ||
2485 | - ``Literal("if")`` will match the leading ``'if'`` in | |
2486 | ``'ifAndOnlyIf'``. | |
2487 | - ``Keyword("if")`` will not; it will only match the leading | |
2488 | ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` | |
2489 | ||
2490 | Accepts two optional constructor arguments in addition to the | |
2491 | keyword string: | |
2492 | ||
2493 | - ``ident_chars`` is a string of characters that would be valid | |
2494 | identifier characters, defaulting to all alphanumerics + "_" and | |
2495 | "$" | |
2496 | - ``caseless`` allows case-insensitive matching, default is ``False``. | |
2497 | ||
2498 | Example:: | |
2499 | ||
2500 | Keyword("start").parse_string("start") # -> ['start'] | |
2501 | Keyword("start").parse_string("starting") # -> Exception | |
2502 | ||
2503 | For case-insensitive matching, use :class:`CaselessKeyword`. | |
2504 | """ | |
2505 | ||
2506 | DEFAULT_KEYWORD_CHARS = alphanums + "_$" | |
2507 | ||
2508 | def __init__( | |
2509 | self, | |
2510 | match_string: str = "", | |
2511 | ident_chars: typing.Optional[str] = None, | |
2512 | caseless: bool = False, | |
2513 | *, | |
2514 | matchString: str = "", | |
2515 | identChars: typing.Optional[str] = None, | |
2516 | ): | |
2517 | super().__init__() | |
2518 | identChars = identChars or ident_chars | |
2519 | if identChars is None: | |
2520 | identChars = Keyword.DEFAULT_KEYWORD_CHARS | |
2521 | match_string = matchString or match_string | |
2522 | self.match = match_string | |
2523 | self.matchLen = len(match_string) | |
2524 | try: | |
2525 | self.firstMatchChar = match_string[0] | |
2526 | except IndexError: | |
2527 | raise ValueError("null string passed to Keyword; use Empty() instead") | |
2528 | self.errmsg = f"Expected {type(self).__name__} {self.name}" | |
2529 | self.mayReturnEmpty = False | |
2530 | self.mayIndexError = False | |
2531 | self.caseless = caseless | |
2532 | if caseless: | |
2533 | self.caselessmatch = match_string.upper() | |
2534 | identChars = identChars.upper() | |
2535 | self.identChars = set(identChars) | |
2536 | ||
2537 | def _generateDefaultName(self) -> str: | |
2538 | return repr(self.match) | |
2539 | ||
2540 | def parseImpl(self, instring, loc, doActions=True): | |
2541 | errmsg = self.errmsg | |
2542 | errloc = loc | |
2543 | if self.caseless: | |
2544 | if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: | |
2545 | if loc == 0 or instring[loc - 1].upper() not in self.identChars: | |
2546 | if ( | |
2547 | loc >= len(instring) - self.matchLen | |
2548 | or instring[loc + self.matchLen].upper() not in self.identChars | |
2549 | ): | |
2550 | return loc + self.matchLen, self.match | |
2551 | else: | |
2552 | # followed by keyword char | |
2553 | errmsg += ", was immediately followed by keyword character" | |
2554 | errloc = loc + self.matchLen | |
2555 | else: | |
2556 | # preceded by keyword char | |
2557 | errmsg += ", keyword was immediately preceded by keyword character" | |
2558 | errloc = loc - 1 | |
2559 | # else no match just raise plain exception | |
2560 | ||
2561 | else: | |
2562 | if ( | |
2563 | instring[loc] == self.firstMatchChar | |
2564 | and self.matchLen == 1 | |
2565 | or instring.startswith(self.match, loc) | |
2566 | ): | |
2567 | if loc == 0 or instring[loc - 1] not in self.identChars: | |
2568 | if ( | |
2569 | loc >= len(instring) - self.matchLen | |
2570 | or instring[loc + self.matchLen] not in self.identChars | |
2571 | ): | |
2572 | return loc + self.matchLen, self.match | |
2573 | else: | |
2574 | # followed by keyword char | |
2575 | errmsg += ( | |
2576 | ", keyword was immediately followed by keyword character" | |
2577 | ) | |
2578 | errloc = loc + self.matchLen | |
2579 | else: | |
2580 | # preceded by keyword char | |
2581 | errmsg += ", keyword was immediately preceded by keyword character" | |
2582 | errloc = loc - 1 | |
2583 | # else no match just raise plain exception | |
2584 | ||
2585 | raise ParseException(instring, errloc, errmsg, self) | |
2586 | ||
2587 | @staticmethod | |
2588 | def set_default_keyword_chars(chars) -> None: | |
2589 | """ | |
2590 | Overrides the default characters used by :class:`Keyword` expressions. | |
2591 | """ | |
2592 | Keyword.DEFAULT_KEYWORD_CHARS = chars | |
2593 | ||
2594 | setDefaultKeywordChars = set_default_keyword_chars | |
2595 | ||
2596 | ||
2597 | class CaselessLiteral(Literal): | |
2598 | """ | |
2599 | Token to match a specified string, ignoring case of letters. | |
2600 | Note: the matched results will always be in the case of the given | |
2601 | match string, NOT the case of the input text. | |
2602 | ||
2603 | Example:: | |
2604 | ||
2605 | CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") | |
2606 | # -> ['CMD', 'CMD', 'CMD'] | |
2607 | ||
2608 | (Contrast with example for :class:`CaselessKeyword`.) | |
2609 | """ | |
2610 | ||
2611 | def __init__(self, match_string: str = "", *, matchString: str = ""): | |
2612 | match_string = matchString or match_string | |
2613 | super().__init__(match_string.upper()) | |
2614 | # Preserve the defining literal. | |
2615 | self.returnString = match_string | |
2616 | self.errmsg = "Expected " + self.name | |
2617 | ||
2618 | def parseImpl(self, instring, loc, doActions=True): | |
2619 | if instring[loc : loc + self.matchLen].upper() == self.match: | |
2620 | return loc + self.matchLen, self.returnString | |
2621 | raise ParseException(instring, loc, self.errmsg, self) | |
2622 | ||
2623 | ||
2624 | class CaselessKeyword(Keyword): | |
2625 | """ | |
2626 | Caseless version of :class:`Keyword`. | |
2627 | ||
2628 | Example:: | |
2629 | ||
2630 | CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") | |
2631 | # -> ['CMD', 'CMD'] | |
2632 | ||
2633 | (Contrast with example for :class:`CaselessLiteral`.) | |
2634 | """ | |
2635 | ||
2636 | def __init__( | |
2637 | self, | |
2638 | match_string: str = "", | |
2639 | ident_chars: typing.Optional[str] = None, | |
2640 | *, | |
2641 | matchString: str = "", | |
2642 | identChars: typing.Optional[str] = None, | |
2643 | ): | |
2644 | identChars = identChars or ident_chars | |
2645 | match_string = matchString or match_string | |
2646 | super().__init__(match_string, identChars, caseless=True) | |
2647 | ||
2648 | ||
2649 | class CloseMatch(Token): | |
2650 | """A variation on :class:`Literal` which matches "close" matches, | |
2651 | that is, strings with at most 'n' mismatching characters. | |
2652 | :class:`CloseMatch` takes parameters: | |
2653 | ||
2654 | - ``match_string`` - string to be matched | |
2655 | - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters | |
2656 | - ``max_mismatches`` - (``default=1``) maximum number of | |
2657 | mismatches allowed to count as a match | |
2658 | ||
2659 | The results from a successful parse will contain the matched text | |
2660 | from the input string and the following named results: | |
2661 | ||
2662 | - ``mismatches`` - a list of the positions within the | |
2663 | match_string where mismatches were found | |
2664 | - ``original`` - the original match_string used to compare | |
2665 | against the input string | |
2666 | ||
2667 | If ``mismatches`` is an empty list, then the match was an exact | |
2668 | match. | |
2669 | ||
2670 | Example:: | |
2671 | ||
2672 | patt = CloseMatch("ATCATCGAATGGA") | |
2673 | patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) | |
2674 | patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) | |
2675 | ||
2676 | # exact match | |
2677 | patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) | |
2678 | ||
2679 | # close match allowing up to 2 mismatches | |
2680 | patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) | |
2681 | patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) | |
2682 | """ | |
2683 | ||
2684 | def __init__( | |
2685 | self, | |
2686 | match_string: str, | |
2687 | max_mismatches: typing.Optional[int] = None, | |
2688 | *, | |
2689 | maxMismatches: int = 1, | |
2690 | caseless=False, | |
2691 | ): | |
2692 | maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches | |
2693 | super().__init__() | |
2694 | self.match_string = match_string | |
2695 | self.maxMismatches = maxMismatches | |
2696 | self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" | |
2697 | self.caseless = caseless | |
2698 | self.mayIndexError = False | |
2699 | self.mayReturnEmpty = False | |
2700 | ||
2701 | def _generateDefaultName(self) -> str: | |
2702 | return f"{type(self).__name__}:{self.match_string!r}" | |
2703 | ||
2704 | def parseImpl(self, instring, loc, doActions=True): | |
2705 | start = loc | |
2706 | instrlen = len(instring) | |
2707 | maxloc = start + len(self.match_string) | |
2708 | ||
2709 | if maxloc <= instrlen: | |
2710 | match_string = self.match_string | |
2711 | match_stringloc = 0 | |
2712 | mismatches = [] | |
2713 | maxMismatches = self.maxMismatches | |
2714 | ||
2715 | for match_stringloc, s_m in enumerate( | |
2716 | zip(instring[loc:maxloc], match_string) | |
2717 | ): | |
2718 | src, mat = s_m | |
2719 | if self.caseless: | |
2720 | src, mat = src.lower(), mat.lower() | |
2721 | ||
2722 | if src != mat: | |
2723 | mismatches.append(match_stringloc) | |
2724 | if len(mismatches) > maxMismatches: | |
2725 | break | |
2726 | else: | |
2727 | loc = start + match_stringloc + 1 | |
2728 | results = ParseResults([instring[start:loc]]) | |
2729 | results["original"] = match_string | |
2730 | results["mismatches"] = mismatches | |
2731 | return loc, results | |
2732 | ||
2733 | raise ParseException(instring, loc, self.errmsg, self) | |
2734 | ||
2735 | ||
2736 | class Word(Token): | |
2737 | """Token for matching words composed of allowed character sets. | |
2738 | ||
2739 | Parameters: | |
2740 | ||
2741 | - ``init_chars`` - string of all characters that should be used to | |
2742 | match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; | |
2743 | if ``body_chars`` is also specified, then this is the string of | |
2744 | initial characters | |
2745 | - ``body_chars`` - string of characters that | |
2746 | can be used for matching after a matched initial character as | |
2747 | given in ``init_chars``; if omitted, same as the initial characters | |
2748 | (default=``None``) | |
2749 | - ``min`` - minimum number of characters to match (default=1) | |
2750 | - ``max`` - maximum number of characters to match (default=0) | |
2751 | - ``exact`` - exact number of characters to match (default=0) | |
2752 | - ``as_keyword`` - match as a keyword (default=``False``) | |
2753 | - ``exclude_chars`` - characters that might be | |
2754 | found in the input ``body_chars`` string but which should not be | |
2755 | accepted for matching ;useful to define a word of all | |
2756 | printables except for one or two characters, for instance | |
2757 | (default=``None``) | |
2758 | ||
2759 | :class:`srange` is useful for defining custom character set strings | |
2760 | for defining :class:`Word` expressions, using range notation from | |
2761 | regular expression character sets. | |
2762 | ||
2763 | A common mistake is to use :class:`Word` to match a specific literal | |
2764 | string, as in ``Word("Address")``. Remember that :class:`Word` | |
2765 | uses the string argument to define *sets* of matchable characters. | |
2766 | This expression would match "Add", "AAA", "dAred", or any other word | |
2767 | made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an | |
2768 | exact literal string, use :class:`Literal` or :class:`Keyword`. | |
2769 | ||
2770 | pyparsing includes helper strings for building Words: | |
2771 | ||
2772 | - :class:`alphas` | |
2773 | - :class:`nums` | |
2774 | - :class:`alphanums` | |
2775 | - :class:`hexnums` | |
2776 | - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 | |
2777 | - accented, tilded, umlauted, etc.) | |
2778 | - :class:`punc8bit` (non-alphabetic characters in ASCII range | |
2779 | 128-255 - currency, symbols, superscripts, diacriticals, etc.) | |
2780 | - :class:`printables` (any non-whitespace character) | |
2781 | ||
2782 | ``alphas``, ``nums``, and ``printables`` are also defined in several | |
2783 | Unicode sets - see :class:`pyparsing_unicode``. | |
2784 | ||
2785 | Example:: | |
2786 | ||
2787 | # a word composed of digits | |
2788 | integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) | |
2789 | ||
2790 | # a word with a leading capital, and zero or more lowercase | |
2791 | capital_word = Word(alphas.upper(), alphas.lower()) | |
2792 | ||
2793 | # hostnames are alphanumeric, with leading alpha, and '-' | |
2794 | hostname = Word(alphas, alphanums + '-') | |
2795 | ||
2796 | # roman numeral (not a strict parser, accepts invalid mix of characters) | |
2797 | roman = Word("IVXLCDM") | |
2798 | ||
2799 | # any string of non-whitespace characters, except for ',' | |
2800 | csv_value = Word(printables, exclude_chars=",") | |
2801 | """ | |
2802 | ||
2803 | def __init__( | |
2804 | self, | |
2805 | init_chars: str = "", | |
2806 | body_chars: typing.Optional[str] = None, | |
2807 | min: int = 1, | |
2808 | max: int = 0, | |
2809 | exact: int = 0, | |
2810 | as_keyword: bool = False, | |
2811 | exclude_chars: typing.Optional[str] = None, | |
2812 | *, | |
2813 | initChars: typing.Optional[str] = None, | |
2814 | bodyChars: typing.Optional[str] = None, | |
2815 | asKeyword: bool = False, | |
2816 | excludeChars: typing.Optional[str] = None, | |
2817 | ): | |
2818 | initChars = initChars or init_chars | |
2819 | bodyChars = bodyChars or body_chars | |
2820 | asKeyword = asKeyword or as_keyword | |
2821 | excludeChars = excludeChars or exclude_chars | |
2822 | super().__init__() | |
2823 | if not initChars: | |
2824 | raise ValueError( | |
2825 | f"invalid {type(self).__name__}, initChars cannot be empty string" | |
2826 | ) | |
2827 | ||
2828 | initChars_set = set(initChars) | |
2829 | if excludeChars: | |
2830 | excludeChars_set = set(excludeChars) | |
2831 | initChars_set -= excludeChars_set | |
2832 | if bodyChars: | |
2833 | bodyChars = "".join(set(bodyChars) - excludeChars_set) | |
2834 | self.initChars = initChars_set | |
2835 | self.initCharsOrig = "".join(sorted(initChars_set)) | |
2836 | ||
2837 | if bodyChars: | |
2838 | self.bodyChars = set(bodyChars) | |
2839 | self.bodyCharsOrig = "".join(sorted(bodyChars)) | |
2840 | else: | |
2841 | self.bodyChars = initChars_set | |
2842 | self.bodyCharsOrig = self.initCharsOrig | |
2843 | ||
2844 | self.maxSpecified = max > 0 | |
2845 | ||
2846 | if min < 1: | |
2847 | raise ValueError( | |
2848 | "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" | |
2849 | ) | |
2850 | ||
2851 | if self.maxSpecified and min > max: | |
2852 | raise ValueError( | |
2853 | f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" | |
2854 | ) | |
2855 | ||
2856 | self.minLen = min | |
2857 | ||
2858 | if max > 0: | |
2859 | self.maxLen = max | |
2860 | else: | |
2861 | self.maxLen = _MAX_INT | |
2862 | ||
2863 | if exact > 0: | |
2864 | min = max = exact | |
2865 | self.maxLen = exact | |
2866 | self.minLen = exact | |
2867 | ||
2868 | self.errmsg = "Expected " + self.name | |
2869 | self.mayIndexError = False | |
2870 | self.asKeyword = asKeyword | |
2871 | if self.asKeyword: | |
2872 | self.errmsg += " as a keyword" | |
2873 | ||
2874 | # see if we can make a regex for this Word | |
2875 | if " " not in (self.initChars | self.bodyChars): | |
2876 | if len(self.initChars) == 1: | |
2877 | re_leading_fragment = re.escape(self.initCharsOrig) | |
2878 | else: | |
2879 | re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" | |
2880 | ||
2881 | if self.bodyChars == self.initChars: | |
2882 | if max == 0: | |
2883 | repeat = "+" | |
2884 | elif max == 1: | |
2885 | repeat = "" | |
2886 | else: | |
2887 | if self.minLen != self.maxLen: | |
2888 | repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" | |
2889 | else: | |
2890 | repeat = f"{{{self.minLen}}}" | |
2891 | self.reString = f"{re_leading_fragment}{repeat}" | |
2892 | else: | |
2893 | if max == 1: | |
2894 | re_body_fragment = "" | |
2895 | repeat = "" | |
2896 | else: | |
2897 | re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" | |
2898 | if max == 0: | |
2899 | repeat = "*" | |
2900 | elif max == 2: | |
2901 | repeat = "?" if min <= 1 else "" | |
2902 | else: | |
2903 | if min != max: | |
2904 | repeat = f"{{{min - 1 if min > 0 else 0},{max - 1}}}" | |
2905 | else: | |
2906 | repeat = f"{{{min - 1 if min > 0 else 0}}}" | |
2907 | ||
2908 | self.reString = ( | |
2909 | f"{re_leading_fragment}" f"{re_body_fragment}" f"{repeat}" | |
2910 | ) | |
2911 | ||
2912 | if self.asKeyword: | |
2913 | self.reString = rf"\b{self.reString}\b" | |
2914 | ||
2915 | try: | |
2916 | self.re = re.compile(self.reString) | |
2917 | except re.error: | |
2918 | self.re = None # type: ignore[assignment] | |
2919 | else: | |
2920 | self.re_match = self.re.match | |
2921 | self.parseImpl = self.parseImpl_regex # type: ignore[assignment] | |
2922 | ||
2923 | def _generateDefaultName(self) -> str: | |
2924 | def charsAsStr(s): | |
2925 | max_repr_len = 16 | |
2926 | s = _collapse_string_to_ranges(s, re_escape=False) | |
2927 | if len(s) > max_repr_len: | |
2928 | return s[: max_repr_len - 3] + "..." | |
2929 | else: | |
2930 | return s | |
2931 | ||
2932 | if self.initChars != self.bodyChars: | |
2933 | base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" | |
2934 | else: | |
2935 | base = f"W:({charsAsStr(self.initChars)})" | |
2936 | ||
2937 | # add length specification | |
2938 | if self.minLen > 1 or self.maxLen != _MAX_INT: | |
2939 | if self.minLen == self.maxLen: | |
2940 | if self.minLen == 1: | |
2941 | return base[2:] | |
2942 | else: | |
2943 | return base + f"{{{self.minLen}}}" | |
2944 | elif self.maxLen == _MAX_INT: | |
2945 | return base + f"{{{self.minLen},...}}" | |
2946 | else: | |
2947 | return base + f"{{{self.minLen},{self.maxLen}}}" | |
2948 | return base | |
2949 | ||
2950 | def parseImpl(self, instring, loc, doActions=True): | |
2951 | if instring[loc] not in self.initChars: | |
2952 | raise ParseException(instring, loc, self.errmsg, self) | |
2953 | ||
2954 | start = loc | |
2955 | loc += 1 | |
2956 | instrlen = len(instring) | |
2957 | bodychars = self.bodyChars | |
2958 | maxloc = start + self.maxLen | |
2959 | maxloc = min(maxloc, instrlen) | |
2960 | while loc < maxloc and instring[loc] in bodychars: | |
2961 | loc += 1 | |
2962 | ||
2963 | throwException = False | |
2964 | if loc - start < self.minLen: | |
2965 | throwException = True | |
2966 | elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: | |
2967 | throwException = True | |
2968 | elif self.asKeyword: | |
2969 | if ( | |
2970 | start > 0 | |
2971 | and instring[start - 1] in bodychars | |
2972 | or loc < instrlen | |
2973 | and instring[loc] in bodychars | |
2974 | ): | |
2975 | throwException = True | |
2976 | ||
2977 | if throwException: | |
2978 | raise ParseException(instring, loc, self.errmsg, self) | |
2979 | ||
2980 | return loc, instring[start:loc] | |
2981 | ||
2982 | def parseImpl_regex(self, instring, loc, doActions=True): | |
2983 | result = self.re_match(instring, loc) | |
2984 | if not result: | |
2985 | raise ParseException(instring, loc, self.errmsg, self) | |
2986 | ||
2987 | loc = result.end() | |
2988 | return loc, result.group() | |
2989 | ||
2990 | ||
2991 | class Char(Word): | |
2992 | """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, | |
2993 | when defining a match of any single character in a string of | |
2994 | characters. | |
2995 | """ | |
2996 | ||
2997 | def __init__( | |
2998 | self, | |
2999 | charset: str, | |
3000 | as_keyword: bool = False, | |
3001 | exclude_chars: typing.Optional[str] = None, | |
3002 | *, | |
3003 | asKeyword: bool = False, | |
3004 | excludeChars: typing.Optional[str] = None, | |
3005 | ): | |
3006 | asKeyword = asKeyword or as_keyword | |
3007 | excludeChars = excludeChars or exclude_chars | |
3008 | super().__init__( | |
3009 | charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars | |
3010 | ) | |
3011 | ||
3012 | ||
3013 | class Regex(Token): | |
3014 | r"""Token for matching strings that match a given regular | |
3015 | expression. Defined with string specifying the regular expression in | |
3016 | a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. | |
3017 | If the given regex contains named groups (defined using ``(?P<name>...)``), | |
3018 | these will be preserved as named :class:`ParseResults`. | |
3019 | ||
3020 | If instead of the Python stdlib ``re`` module you wish to use a different RE module | |
3021 | (such as the ``regex`` module), you can do so by building your ``Regex`` object with | |
3022 | a compiled RE that was compiled using ``regex``. | |
3023 | ||
3024 | Example:: | |
3025 | ||
3026 | realnum = Regex(r"[+-]?\d+\.\d*") | |
3027 | # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression | |
3028 | roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") | |
3029 | ||
3030 | # named fields in a regex will be returned as named results | |
3031 | date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') | |
3032 | ||
3033 | # the Regex class will accept re's compiled using the regex module | |
3034 | import regex | |
3035 | parser = pp.Regex(regex.compile(r'[0-9]')) | |
3036 | """ | |
3037 | ||
3038 | def __init__( | |
3039 | self, | |
3040 | pattern: Any, | |
3041 | flags: Union[re.RegexFlag, int] = 0, | |
3042 | as_group_list: bool = False, | |
3043 | as_match: bool = False, | |
3044 | *, | |
3045 | asGroupList: bool = False, | |
3046 | asMatch: bool = False, | |
3047 | ): | |
3048 | """The parameters ``pattern`` and ``flags`` are passed | |
3049 | to the ``re.compile()`` function as-is. See the Python | |
3050 | `re module <https://docs.python.org/3/library/re.html>`_ module for an | |
3051 | explanation of the acceptable patterns and flags. | |
3052 | """ | |
3053 | super().__init__() | |
3054 | asGroupList = asGroupList or as_group_list | |
3055 | asMatch = asMatch or as_match | |
3056 | ||
3057 | if isinstance(pattern, str_type): | |
3058 | if not pattern: | |
3059 | raise ValueError("null string passed to Regex; use Empty() instead") | |
3060 | ||
3061 | self._re = None | |
3062 | self.reString = self.pattern = pattern | |
3063 | self.flags = flags | |
3064 | ||
3065 | elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): | |
3066 | self._re = pattern | |
3067 | self.pattern = self.reString = pattern.pattern | |
3068 | self.flags = flags | |
3069 | ||
3070 | else: | |
3071 | raise TypeError( | |
3072 | "Regex may only be constructed with a string or a compiled RE object" | |
3073 | ) | |
3074 | ||
3075 | self.errmsg = "Expected " + self.name | |
3076 | self.mayIndexError = False | |
3077 | self.asGroupList = asGroupList | |
3078 | self.asMatch = asMatch | |
3079 | if self.asGroupList: | |
3080 | self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment] | |
3081 | if self.asMatch: | |
3082 | self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] | |
3083 | ||
3084 | @cached_property | |
3085 | def re(self): | |
3086 | if self._re: | |
3087 | return self._re | |
3088 | else: | |
3089 | try: | |
3090 | return re.compile(self.pattern, self.flags) | |
3091 | except re.error: | |
3092 | raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") | |
3093 | ||
3094 | @cached_property | |
3095 | def re_match(self): | |
3096 | return self.re.match | |
3097 | ||
3098 | @cached_property | |
3099 | def mayReturnEmpty(self): | |
3100 | return self.re_match("") is not None | |
3101 | ||
3102 | def _generateDefaultName(self) -> str: | |
3103 | return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) | |
3104 | ||
3105 | def parseImpl(self, instring, loc, doActions=True): | |
3106 | result = self.re_match(instring, loc) | |
3107 | if not result: | |
3108 | raise ParseException(instring, loc, self.errmsg, self) | |
3109 | ||
3110 | loc = result.end() | |
3111 | ret = ParseResults(result.group()) | |
3112 | d = result.groupdict() | |
3113 | if d: | |
3114 | for k, v in d.items(): | |
3115 | ret[k] = v | |
3116 | return loc, ret | |
3117 | ||
3118 | def parseImplAsGroupList(self, instring, loc, doActions=True): | |
3119 | result = self.re_match(instring, loc) | |
3120 | if not result: | |
3121 | raise ParseException(instring, loc, self.errmsg, self) | |
3122 | ||
3123 | loc = result.end() | |
3124 | ret = result.groups() | |
3125 | return loc, ret | |
3126 | ||
3127 | def parseImplAsMatch(self, instring, loc, doActions=True): | |
3128 | result = self.re_match(instring, loc) | |
3129 | if not result: | |
3130 | raise ParseException(instring, loc, self.errmsg, self) | |
3131 | ||
3132 | loc = result.end() | |
3133 | ret = result | |
3134 | return loc, ret | |
3135 | ||
3136 | def sub(self, repl: str) -> ParserElement: | |
3137 | r""" | |
3138 | Return :class:`Regex` with an attached parse action to transform the parsed | |
3139 | result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. | |
3140 | ||
3141 | Example:: | |
3142 | ||
3143 | make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") | |
3144 | print(make_html.transform_string("h1:main title:")) | |
3145 | # prints "<h1>main title</h1>" | |
3146 | """ | |
3147 | if self.asGroupList: | |
3148 | raise TypeError("cannot use sub() with Regex(as_group_list=True)") | |
3149 | ||
3150 | if self.asMatch and callable(repl): | |
3151 | raise TypeError( | |
3152 | "cannot use sub() with a callable with Regex(as_match=True)" | |
3153 | ) | |
3154 | ||
3155 | if self.asMatch: | |
3156 | ||
3157 | def pa(tokens): | |
3158 | return tokens[0].expand(repl) | |
3159 | ||
3160 | else: | |
3161 | ||
3162 | def pa(tokens): | |
3163 | return self.re.sub(repl, tokens[0]) | |
3164 | ||
3165 | return self.add_parse_action(pa) | |
3166 | ||
3167 | ||
3168 | class QuotedString(Token): | |
3169 | r""" | |
3170 | Token for matching strings that are delimited by quoting characters. | |
3171 | ||
3172 | Defined with the following parameters: | |
3173 | ||
3174 | - ``quote_char`` - string of one or more characters defining the | |
3175 | quote delimiting string | |
3176 | - ``esc_char`` - character to re_escape quotes, typically backslash | |
3177 | (default= ``None``) | |
3178 | - ``esc_quote`` - special quote sequence to re_escape an embedded quote | |
3179 | string (such as SQL's ``""`` to re_escape an embedded ``"``) | |
3180 | (default= ``None``) | |
3181 | - ``multiline`` - boolean indicating whether quotes can span | |
3182 | multiple lines (default= ``False``) | |
3183 | - ``unquote_results`` - boolean indicating whether the matched text | |
3184 | should be unquoted (default= ``True``) | |
3185 | - ``end_quote_char`` - string of one or more characters defining the | |
3186 | end of the quote delimited string (default= ``None`` => same as | |
3187 | quote_char) | |
3188 | - ``convert_whitespace_escapes`` - convert escaped whitespace | |
3189 | (``'\t'``, ``'\n'``, etc.) to actual whitespace | |
3190 | (default= ``True``) | |
3191 | ||
3192 | Example:: | |
3193 | ||
3194 | qs = QuotedString('"') | |
3195 | print(qs.search_string('lsjdf "This is the quote" sldjf')) | |
3196 | complex_qs = QuotedString('{{', end_quote_char='}}') | |
3197 | print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) | |
3198 | sql_qs = QuotedString('"', esc_quote='""') | |
3199 | print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) | |
3200 | ||
3201 | prints:: | |
3202 | ||
3203 | [['This is the quote']] | |
3204 | [['This is the "quote"']] | |
3205 | [['This is the quote with "embedded" quotes']] | |
3206 | """ | |
3207 | ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) | |
3208 | ||
3209 | def __init__( | |
3210 | self, | |
3211 | quote_char: str = "", | |
3212 | esc_char: typing.Optional[str] = None, | |
3213 | esc_quote: typing.Optional[str] = None, | |
3214 | multiline: bool = False, | |
3215 | unquote_results: bool = True, | |
3216 | end_quote_char: typing.Optional[str] = None, | |
3217 | convert_whitespace_escapes: bool = True, | |
3218 | *, | |
3219 | quoteChar: str = "", | |
3220 | escChar: typing.Optional[str] = None, | |
3221 | escQuote: typing.Optional[str] = None, | |
3222 | unquoteResults: bool = True, | |
3223 | endQuoteChar: typing.Optional[str] = None, | |
3224 | convertWhitespaceEscapes: bool = True, | |
3225 | ): | |
3226 | super().__init__() | |
3227 | escChar = escChar or esc_char | |
3228 | escQuote = escQuote or esc_quote | |
3229 | unquoteResults = unquoteResults and unquote_results | |
3230 | endQuoteChar = endQuoteChar or end_quote_char | |
3231 | convertWhitespaceEscapes = ( | |
3232 | convertWhitespaceEscapes and convert_whitespace_escapes | |
3233 | ) | |
3234 | quote_char = quoteChar or quote_char | |
3235 | ||
3236 | # remove white space from quote chars - wont work anyway | |
3237 | quote_char = quote_char.strip() | |
3238 | if not quote_char: | |
3239 | raise ValueError("quote_char cannot be the empty string") | |
3240 | ||
3241 | if endQuoteChar is None: | |
3242 | endQuoteChar = quote_char | |
3243 | else: | |
3244 | endQuoteChar = endQuoteChar.strip() | |
3245 | if not endQuoteChar: | |
3246 | raise ValueError("end_quote_char cannot be the empty string") | |
3247 | ||
3248 | self.quoteChar: str = quote_char | |
3249 | self.quoteCharLen: int = len(quote_char) | |
3250 | self.firstQuoteChar: str = quote_char[0] | |
3251 | self.endQuoteChar: str = endQuoteChar | |
3252 | self.endQuoteCharLen: int = len(endQuoteChar) | |
3253 | self.escChar: str = escChar or "" | |
3254 | self.escQuote: str = escQuote or "" | |
3255 | self.unquoteResults: bool = unquoteResults | |
3256 | self.convertWhitespaceEscapes: bool = convertWhitespaceEscapes | |
3257 | self.multiline = multiline | |
3258 | ||
3259 | sep = "" | |
3260 | inner_pattern = "" | |
3261 | ||
3262 | if escQuote: | |
3263 | inner_pattern += rf"{sep}(?:{re.escape(escQuote)})" | |
3264 | sep = "|" | |
3265 | ||
3266 | if escChar: | |
3267 | inner_pattern += rf"{sep}(?:{re.escape(escChar)}.)" | |
3268 | sep = "|" | |
3269 | self.escCharReplacePattern = re.escape(escChar) + "(.)" | |
3270 | ||
3271 | if len(self.endQuoteChar) > 1: | |
3272 | inner_pattern += ( | |
3273 | f"{sep}(?:" | |
3274 | + "|".join( | |
3275 | f"(?:{re.escape(self.endQuoteChar[:i])}(?!{re.escape(self.endQuoteChar[i:])}))" | |
3276 | for i in range(len(self.endQuoteChar) - 1, 0, -1) | |
3277 | ) | |
3278 | + ")" | |
3279 | ) | |
3280 | sep = "|" | |
3281 | ||
3282 | self.flags = re.RegexFlag(0) | |
3283 | ||
3284 | if multiline: | |
3285 | self.flags = re.MULTILINE | re.DOTALL | |
3286 | inner_pattern += ( | |
3287 | rf"{sep}(?:[^{_escape_regex_range_chars(self.endQuoteChar[0])}" | |
3288 | rf"{(_escape_regex_range_chars(escChar) if escChar is not None else '')}])" | |
3289 | ) | |
3290 | else: | |
3291 | inner_pattern += ( | |
3292 | rf"{sep}(?:[^{_escape_regex_range_chars(self.endQuoteChar[0])}\n\r" | |
3293 | rf"{(_escape_regex_range_chars(escChar) if escChar is not None else '')}])" | |
3294 | ) | |
3295 | ||
3296 | self.pattern = "".join( | |
3297 | [ | |
3298 | re.escape(self.quoteChar), | |
3299 | "(?:", | |
3300 | inner_pattern, | |
3301 | ")*", | |
3302 | re.escape(self.endQuoteChar), | |
3303 | ] | |
3304 | ) | |
3305 | ||
3306 | if self.unquoteResults: | |
3307 | if self.convertWhitespaceEscapes: | |
3308 | self.unquote_scan_re = re.compile( | |
3309 | rf"({'|'.join(re.escape(k) for k in self.ws_map)})|({re.escape(self.escChar)}.)|(\n|.)", | |
3310 | flags=self.flags, | |
3311 | ) | |
3312 | else: | |
3313 | self.unquote_scan_re = re.compile( | |
3314 | rf"({re.escape(self.escChar)}.)|(\n|.)", flags=self.flags | |
3315 | ) | |
3316 | ||
3317 | try: | |
3318 | self.re = re.compile(self.pattern, self.flags) | |
3319 | self.reString = self.pattern | |
3320 | self.re_match = self.re.match | |
3321 | except re.error: | |
3322 | raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") | |
3323 | ||
3324 | self.errmsg = "Expected " + self.name | |
3325 | self.mayIndexError = False | |
3326 | self.mayReturnEmpty = True | |
3327 | ||
3328 | def _generateDefaultName(self) -> str: | |
3329 | if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): | |
3330 | return f"string enclosed in {self.quoteChar!r}" | |
3331 | ||
3332 | return f"quoted string, starting with {self.quoteChar} ending with {self.endQuoteChar}" | |
3333 | ||
3334 | def parseImpl(self, instring, loc, doActions=True): | |
3335 | result = ( | |
3336 | instring[loc] == self.firstQuoteChar | |
3337 | and self.re_match(instring, loc) | |
3338 | or None | |
3339 | ) | |
3340 | if not result: | |
3341 | raise ParseException(instring, loc, self.errmsg, self) | |
3342 | ||
3343 | loc = result.end() | |
3344 | ret = result.group() | |
3345 | ||
3346 | if self.unquoteResults: | |
3347 | # strip off quotes | |
3348 | ret = ret[self.quoteCharLen : -self.endQuoteCharLen] | |
3349 | ||
3350 | if isinstance(ret, str_type): | |
3351 | if self.convertWhitespaceEscapes: | |
3352 | ret = "".join( | |
3353 | self.ws_map[match.group(1)] | |
3354 | if match.group(1) | |
3355 | else match.group(2)[-1] | |
3356 | if match.group(2) | |
3357 | else match.group(3) | |
3358 | for match in self.unquote_scan_re.finditer(ret) | |
3359 | ) | |
3360 | else: | |
3361 | ret = "".join( | |
3362 | match.group(1)[-1] if match.group(1) else match.group(2) | |
3363 | for match in self.unquote_scan_re.finditer(ret) | |
3364 | ) | |
3365 | ||
3366 | # replace escaped quotes | |
3367 | if self.escQuote: | |
3368 | ret = ret.replace(self.escQuote, self.endQuoteChar) | |
3369 | ||
3370 | return loc, ret | |
3371 | ||
3372 | ||
3373 | class CharsNotIn(Token): | |
3374 | """Token for matching words composed of characters *not* in a given | |
3375 | set (will include whitespace in matched characters if not listed in | |
3376 | the provided exclusion set - see example). Defined with string | |
3377 | containing all disallowed characters, and an optional minimum, | |
3378 | maximum, and/or exact length. The default value for ``min`` is | |
3379 | 1 (a minimum value < 1 is not valid); the default values for | |
3380 | ``max`` and ``exact`` are 0, meaning no maximum or exact | |
3381 | length restriction. | |
3382 | ||
3383 | Example:: | |
3384 | ||
3385 | # define a comma-separated-value as anything that is not a ',' | |
3386 | csv_value = CharsNotIn(',') | |
3387 | print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) | |
3388 | ||
3389 | prints:: | |
3390 | ||
3391 | ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] | |
3392 | """ | |
3393 | ||
3394 | def __init__( | |
3395 | self, | |
3396 | not_chars: str = "", | |
3397 | min: int = 1, | |
3398 | max: int = 0, | |
3399 | exact: int = 0, | |
3400 | *, | |
3401 | notChars: str = "", | |
3402 | ): | |
3403 | super().__init__() | |
3404 | self.skipWhitespace = False | |
3405 | self.notChars = not_chars or notChars | |
3406 | self.notCharsSet = set(self.notChars) | |
3407 | ||
3408 | if min < 1: | |
3409 | raise ValueError( | |
3410 | "cannot specify a minimum length < 1; use " | |
3411 | "Opt(CharsNotIn()) if zero-length char group is permitted" | |
3412 | ) | |
3413 | ||
3414 | self.minLen = min | |
3415 | ||
3416 | if max > 0: | |
3417 | self.maxLen = max | |
3418 | else: | |
3419 | self.maxLen = _MAX_INT | |
3420 | ||
3421 | if exact > 0: | |
3422 | self.maxLen = exact | |
3423 | self.minLen = exact | |
3424 | ||
3425 | self.errmsg = "Expected " + self.name | |
3426 | self.mayReturnEmpty = self.minLen == 0 | |
3427 | self.mayIndexError = False | |
3428 | ||
3429 | def _generateDefaultName(self) -> str: | |
3430 | not_chars_str = _collapse_string_to_ranges(self.notChars) | |
3431 | if len(not_chars_str) > 16: | |
3432 | return f"!W:({self.notChars[: 16 - 3]}...)" | |
3433 | else: | |
3434 | return f"!W:({self.notChars})" | |
3435 | ||
3436 | def parseImpl(self, instring, loc, doActions=True): | |
3437 | notchars = self.notCharsSet | |
3438 | if instring[loc] in notchars: | |
3439 | raise ParseException(instring, loc, self.errmsg, self) | |
3440 | ||
3441 | start = loc | |
3442 | loc += 1 | |
3443 | maxlen = min(start + self.maxLen, len(instring)) | |
3444 | while loc < maxlen and instring[loc] not in notchars: | |
3445 | loc += 1 | |
3446 | ||
3447 | if loc - start < self.minLen: | |
3448 | raise ParseException(instring, loc, self.errmsg, self) | |
3449 | ||
3450 | return loc, instring[start:loc] | |
3451 | ||
3452 | ||
3453 | class White(Token): | |
3454 | """Special matching class for matching whitespace. Normally, | |
3455 | whitespace is ignored by pyparsing grammars. This class is included | |
3456 | when some whitespace structures are significant. Define with | |
3457 | a string containing the whitespace characters to be matched; default | |
3458 | is ``" \\t\\r\\n"``. Also takes optional ``min``, | |
3459 | ``max``, and ``exact`` arguments, as defined for the | |
3460 | :class:`Word` class. | |
3461 | """ | |
3462 | ||
3463 | whiteStrs = { | |
3464 | " ": "<SP>", | |
3465 | "\t": "<TAB>", | |
3466 | "\n": "<LF>", | |
3467 | "\r": "<CR>", | |
3468 | "\f": "<FF>", | |
3469 | "\u00A0": "<NBSP>", | |
3470 | "\u1680": "<OGHAM_SPACE_MARK>", | |
3471 | "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", | |
3472 | "\u2000": "<EN_QUAD>", | |
3473 | "\u2001": "<EM_QUAD>", | |
3474 | "\u2002": "<EN_SPACE>", | |
3475 | "\u2003": "<EM_SPACE>", | |
3476 | "\u2004": "<THREE-PER-EM_SPACE>", | |
3477 | "\u2005": "<FOUR-PER-EM_SPACE>", | |
3478 | "\u2006": "<SIX-PER-EM_SPACE>", | |
3479 | "\u2007": "<FIGURE_SPACE>", | |
3480 | "\u2008": "<PUNCTUATION_SPACE>", | |
3481 | "\u2009": "<THIN_SPACE>", | |
3482 | "\u200A": "<HAIR_SPACE>", | |
3483 | "\u200B": "<ZERO_WIDTH_SPACE>", | |
3484 | "\u202F": "<NNBSP>", | |
3485 | "\u205F": "<MMSP>", | |
3486 | "\u3000": "<IDEOGRAPHIC_SPACE>", | |
3487 | } | |
3488 | ||
3489 | def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): | |
3490 | super().__init__() | |
3491 | self.matchWhite = ws | |
3492 | self.set_whitespace_chars( | |
3493 | "".join(c for c in self.whiteStrs if c not in self.matchWhite), | |
3494 | copy_defaults=True, | |
3495 | ) | |
3496 | # self.leave_whitespace() | |
3497 | self.mayReturnEmpty = True | |
3498 | self.errmsg = "Expected " + self.name | |
3499 | ||
3500 | self.minLen = min | |
3501 | ||
3502 | if max > 0: | |
3503 | self.maxLen = max | |
3504 | else: | |
3505 | self.maxLen = _MAX_INT | |
3506 | ||
3507 | if exact > 0: | |
3508 | self.maxLen = exact | |
3509 | self.minLen = exact | |
3510 | ||
3511 | def _generateDefaultName(self) -> str: | |
3512 | return "".join(White.whiteStrs[c] for c in self.matchWhite) | |
3513 | ||
3514 | def parseImpl(self, instring, loc, doActions=True): | |
3515 | if instring[loc] not in self.matchWhite: | |
3516 | raise ParseException(instring, loc, self.errmsg, self) | |
3517 | start = loc | |
3518 | loc += 1 | |
3519 | maxloc = start + self.maxLen | |
3520 | maxloc = min(maxloc, len(instring)) | |
3521 | while loc < maxloc and instring[loc] in self.matchWhite: | |
3522 | loc += 1 | |
3523 | ||
3524 | if loc - start < self.minLen: | |
3525 | raise ParseException(instring, loc, self.errmsg, self) | |
3526 | ||
3527 | return loc, instring[start:loc] | |
3528 | ||
3529 | ||
3530 | class PositionToken(Token): | |
3531 | def __init__(self): | |
3532 | super().__init__() | |
3533 | self.mayReturnEmpty = True | |
3534 | self.mayIndexError = False | |
3535 | ||
3536 | ||
3537 | class GoToColumn(PositionToken): | |
3538 | """Token to advance to a specific column of input text; useful for | |
3539 | tabular report scraping. | |
3540 | """ | |
3541 | ||
3542 | def __init__(self, colno: int): | |
3543 | super().__init__() | |
3544 | self.col = colno | |
3545 | ||
3546 | def preParse(self, instring: str, loc: int) -> int: | |
3547 | if col(loc, instring) != self.col: | |
3548 | instrlen = len(instring) | |
3549 | if self.ignoreExprs: | |
3550 | loc = self._skipIgnorables(instring, loc) | |
3551 | while ( | |
3552 | loc < instrlen | |
3553 | and instring[loc].isspace() | |
3554 | and col(loc, instring) != self.col | |
3555 | ): | |
3556 | loc += 1 | |
3557 | return loc | |
3558 | ||
3559 | def parseImpl(self, instring, loc, doActions=True): | |
3560 | thiscol = col(loc, instring) | |
3561 | if thiscol > self.col: | |
3562 | raise ParseException(instring, loc, "Text not in expected column", self) | |
3563 | newloc = loc + self.col - thiscol | |
3564 | ret = instring[loc:newloc] | |
3565 | return newloc, ret | |
3566 | ||
3567 | ||
3568 | class LineStart(PositionToken): | |
3569 | r"""Matches if current position is at the beginning of a line within | |
3570 | the parse string | |
3571 | ||
3572 | Example:: | |
3573 | ||
3574 | test = '''\ | |
3575 | AAA this line | |
3576 | AAA and this line | |
3577 | AAA but not this one | |
3578 | B AAA and definitely not this one | |
3579 | ''' | |
3580 | ||
3581 | for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): | |
3582 | print(t) | |
3583 | ||
3584 | prints:: | |
3585 | ||
3586 | ['AAA', ' this line'] | |
3587 | ['AAA', ' and this line'] | |
3588 | ||
3589 | """ | |
3590 | ||
3591 | def __init__(self): | |
3592 | super().__init__() | |
3593 | self.leave_whitespace() | |
3594 | self.orig_whiteChars = set() | self.whiteChars | |
3595 | self.whiteChars.discard("\n") | |
3596 | self.skipper = Empty().set_whitespace_chars(self.whiteChars) | |
3597 | self.errmsg = "Expected start of line" | |
3598 | ||
3599 | def preParse(self, instring: str, loc: int) -> int: | |
3600 | if loc == 0: | |
3601 | return loc | |
3602 | else: | |
3603 | ret = self.skipper.preParse(instring, loc) | |
3604 | if "\n" in self.orig_whiteChars: | |
3605 | while instring[ret : ret + 1] == "\n": | |
3606 | ret = self.skipper.preParse(instring, ret + 1) | |
3607 | return ret | |
3608 | ||
3609 | def parseImpl(self, instring, loc, doActions=True): | |
3610 | if col(loc, instring) == 1: | |
3611 | return loc, [] | |
3612 | raise ParseException(instring, loc, self.errmsg, self) | |
3613 | ||
3614 | ||
3615 | class LineEnd(PositionToken): | |
3616 | """Matches if current position is at the end of a line within the | |
3617 | parse string | |
3618 | """ | |
3619 | ||
3620 | def __init__(self): | |
3621 | super().__init__() | |
3622 | self.whiteChars.discard("\n") | |
3623 | self.set_whitespace_chars(self.whiteChars, copy_defaults=False) | |
3624 | self.errmsg = "Expected end of line" | |
3625 | ||
3626 | def parseImpl(self, instring, loc, doActions=True): | |
3627 | if loc < len(instring): | |
3628 | if instring[loc] == "\n": | |
3629 | return loc + 1, "\n" | |
3630 | else: | |
3631 | raise ParseException(instring, loc, self.errmsg, self) | |
3632 | elif loc == len(instring): | |
3633 | return loc + 1, [] | |
3634 | else: | |
3635 | raise ParseException(instring, loc, self.errmsg, self) | |
3636 | ||
3637 | ||
3638 | class StringStart(PositionToken): | |
3639 | """Matches if current position is at the beginning of the parse | |
3640 | string | |
3641 | """ | |
3642 | ||
3643 | def __init__(self): | |
3644 | super().__init__() | |
3645 | self.errmsg = "Expected start of text" | |
3646 | ||
3647 | def parseImpl(self, instring, loc, doActions=True): | |
3648 | if loc != 0: | |
3649 | # see if entire string up to here is just whitespace and ignoreables | |
3650 | if loc != self.preParse(instring, 0): | |
3651 | raise ParseException(instring, loc, self.errmsg, self) | |
3652 | return loc, [] | |
3653 | ||
3654 | ||
3655 | class StringEnd(PositionToken): | |
3656 | """ | |
3657 | Matches if current position is at the end of the parse string | |
3658 | """ | |
3659 | ||
3660 | def __init__(self): | |
3661 | super().__init__() | |
3662 | self.errmsg = "Expected end of text" | |
3663 | ||
3664 | def parseImpl(self, instring, loc, doActions=True): | |
3665 | if loc < len(instring): | |
3666 | raise ParseException(instring, loc, self.errmsg, self) | |
3667 | elif loc == len(instring): | |
3668 | return loc + 1, [] | |
3669 | elif loc > len(instring): | |
3670 | return loc, [] | |
3671 | else: | |
3672 | raise ParseException(instring, loc, self.errmsg, self) | |
3673 | ||
3674 | ||
3675 | class WordStart(PositionToken): | |
3676 | """Matches if the current position is at the beginning of a | |
3677 | :class:`Word`, and is not preceded by any character in a given | |
3678 | set of ``word_chars`` (default= ``printables``). To emulate the | |
3679 | ``\b`` behavior of regular expressions, use | |
3680 | ``WordStart(alphanums)``. ``WordStart`` will also match at | |
3681 | the beginning of the string being parsed, or at the beginning of | |
3682 | a line. | |
3683 | """ | |
3684 | ||
3685 | def __init__(self, word_chars: str = printables, *, wordChars: str = printables): | |
3686 | wordChars = word_chars if wordChars == printables else wordChars | |
3687 | super().__init__() | |
3688 | self.wordChars = set(wordChars) | |
3689 | self.errmsg = "Not at the start of a word" | |
3690 | ||
3691 | def parseImpl(self, instring, loc, doActions=True): | |
3692 | if loc != 0: | |
3693 | if ( | |
3694 | instring[loc - 1] in self.wordChars | |
3695 | or instring[loc] not in self.wordChars | |
3696 | ): | |
3697 | raise ParseException(instring, loc, self.errmsg, self) | |
3698 | return loc, [] | |
3699 | ||
3700 | ||
3701 | class WordEnd(PositionToken): | |
3702 | """Matches if the current position is at the end of a :class:`Word`, | |
3703 | and is not followed by any character in a given set of ``word_chars`` | |
3704 | (default= ``printables``). To emulate the ``\b`` behavior of | |
3705 | regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` | |
3706 | will also match at the end of the string being parsed, or at the end | |
3707 | of a line. | |
3708 | """ | |
3709 | ||
3710 | def __init__(self, word_chars: str = printables, *, wordChars: str = printables): | |
3711 | wordChars = word_chars if wordChars == printables else wordChars | |
3712 | super().__init__() | |
3713 | self.wordChars = set(wordChars) | |
3714 | self.skipWhitespace = False | |
3715 | self.errmsg = "Not at the end of a word" | |
3716 | ||
3717 | def parseImpl(self, instring, loc, doActions=True): | |
3718 | instrlen = len(instring) | |
3719 | if instrlen > 0 and loc < instrlen: | |
3720 | if ( | |
3721 | instring[loc] in self.wordChars | |
3722 | or instring[loc - 1] not in self.wordChars | |
3723 | ): | |
3724 | raise ParseException(instring, loc, self.errmsg, self) | |
3725 | return loc, [] | |
3726 | ||
3727 | ||
3728 | class ParseExpression(ParserElement): | |
3729 | """Abstract subclass of ParserElement, for combining and | |
3730 | post-processing parsed tokens. | |
3731 | """ | |
3732 | ||
3733 | def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): | |
3734 | super().__init__(savelist) | |
3735 | self.exprs: List[ParserElement] | |
3736 | if isinstance(exprs, _generatorType): | |
3737 | exprs = list(exprs) | |
3738 | ||
3739 | if isinstance(exprs, str_type): | |
3740 | self.exprs = [self._literalStringClass(exprs)] | |
3741 | elif isinstance(exprs, ParserElement): | |
3742 | self.exprs = [exprs] | |
3743 | elif isinstance(exprs, Iterable): | |
3744 | exprs = list(exprs) | |
3745 | # if sequence of strings provided, wrap with Literal | |
3746 | if any(isinstance(expr, str_type) for expr in exprs): | |
3747 | exprs = ( | |
3748 | self._literalStringClass(e) if isinstance(e, str_type) else e | |
3749 | for e in exprs | |
3750 | ) | |
3751 | self.exprs = list(exprs) | |
3752 | else: | |
3753 | try: | |
3754 | self.exprs = list(exprs) | |
3755 | except TypeError: | |
3756 | self.exprs = [exprs] | |
3757 | self.callPreparse = False | |
3758 | ||
3759 | def recurse(self) -> List[ParserElement]: | |
3760 | return self.exprs[:] | |
3761 | ||
3762 | def append(self, other) -> ParserElement: | |
3763 | self.exprs.append(other) | |
3764 | self._defaultName = None | |
3765 | return self | |
3766 | ||
3767 | def leave_whitespace(self, recursive: bool = True) -> ParserElement: | |
3768 | """ | |
3769 | Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on | |
3770 | all contained expressions. | |
3771 | """ | |
3772 | super().leave_whitespace(recursive) | |
3773 | ||
3774 | if recursive: | |
3775 | self.exprs = [e.copy() for e in self.exprs] | |
3776 | for e in self.exprs: | |
3777 | e.leave_whitespace(recursive) | |
3778 | return self | |
3779 | ||
3780 | def ignore_whitespace(self, recursive: bool = True) -> ParserElement: | |
3781 | """ | |
3782 | Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on | |
3783 | all contained expressions. | |
3784 | """ | |
3785 | super().ignore_whitespace(recursive) | |
3786 | if recursive: | |
3787 | self.exprs = [e.copy() for e in self.exprs] | |
3788 | for e in self.exprs: | |
3789 | e.ignore_whitespace(recursive) | |
3790 | return self | |
3791 | ||
3792 | def ignore(self, other) -> ParserElement: | |
3793 | if isinstance(other, Suppress): | |
3794 | if other not in self.ignoreExprs: | |
3795 | super().ignore(other) | |
3796 | for e in self.exprs: | |
3797 | e.ignore(self.ignoreExprs[-1]) | |
3798 | else: | |
3799 | super().ignore(other) | |
3800 | for e in self.exprs: | |
3801 | e.ignore(self.ignoreExprs[-1]) | |
3802 | return self | |
3803 | ||
3804 | def _generateDefaultName(self) -> str: | |
3805 | return f"{self.__class__.__name__}:({str(self.exprs)})" | |
3806 | ||
3807 | def streamline(self) -> ParserElement: | |
3808 | if self.streamlined: | |
3809 | return self | |
3810 | ||
3811 | super().streamline() | |
3812 | ||
3813 | for e in self.exprs: | |
3814 | e.streamline() | |
3815 | ||
3816 | # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` | |
3817 | # but only if there are no parse actions or resultsNames on the nested And's | |
3818 | # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) | |
3819 | if len(self.exprs) == 2: | |
3820 | other = self.exprs[0] | |
3821 | if ( | |
3822 | isinstance(other, self.__class__) | |
3823 | and not other.parseAction | |
3824 | and other.resultsName is None | |
3825 | and not other.debug | |
3826 | ): | |
3827 | self.exprs = other.exprs[:] + [self.exprs[1]] | |
3828 | self._defaultName = None | |
3829 | self.mayReturnEmpty |= other.mayReturnEmpty | |
3830 | self.mayIndexError |= other.mayIndexError | |
3831 | ||
3832 | other = self.exprs[-1] | |
3833 | if ( | |
3834 | isinstance(other, self.__class__) | |
3835 | and not other.parseAction | |
3836 | and other.resultsName is None | |
3837 | and not other.debug | |
3838 | ): | |
3839 | self.exprs = self.exprs[:-1] + other.exprs[:] | |
3840 | self._defaultName = None | |
3841 | self.mayReturnEmpty |= other.mayReturnEmpty | |
3842 | self.mayIndexError |= other.mayIndexError | |
3843 | ||
3844 | self.errmsg = "Expected " + str(self) | |
3845 | ||
3846 | return self | |
3847 | ||
3848 | def validate(self, validateTrace=None) -> None: | |
3849 | warnings.warn( | |
3850 | "ParserElement.validate() is deprecated, and should not be used to check for left recursion", | |
3851 | DeprecationWarning, | |
3852 | stacklevel=2, | |
3853 | ) | |
3854 | tmp = (validateTrace if validateTrace is not None else [])[:] + [self] | |
3855 | for e in self.exprs: | |
3856 | e.validate(tmp) | |
3857 | self._checkRecursion([]) | |
3858 | ||
3859 | def copy(self) -> ParserElement: | |
3860 | ret = super().copy() | |
3861 | ret = typing.cast(ParseExpression, ret) | |
3862 | ret.exprs = [e.copy() for e in self.exprs] | |
3863 | return ret | |
3864 | ||
3865 | def _setResultsName(self, name, listAllMatches=False): | |
3866 | if ( | |
3867 | __diag__.warn_ungrouped_named_tokens_in_collection | |
3868 | and Diagnostics.warn_ungrouped_named_tokens_in_collection | |
3869 | not in self.suppress_warnings_ | |
3870 | ): | |
3871 | for e in self.exprs: | |
3872 | if ( | |
3873 | isinstance(e, ParserElement) | |
3874 | and e.resultsName | |
3875 | and Diagnostics.warn_ungrouped_named_tokens_in_collection | |
3876 | not in e.suppress_warnings_ | |
3877 | ): | |
3878 | warnings.warn( | |
3879 | "{}: setting results name {!r} on {} expression " | |
3880 | "collides with {!r} on contained expression".format( | |
3881 | "warn_ungrouped_named_tokens_in_collection", | |
3882 | name, | |
3883 | type(self).__name__, | |
3884 | e.resultsName, | |
3885 | ), | |
3886 | stacklevel=3, | |
3887 | ) | |
3888 | ||
3889 | return super()._setResultsName(name, listAllMatches) | |
3890 | ||
3891 | # Compatibility synonyms | |
3892 | # fmt: off | |
3893 | @replaced_by_pep8(leave_whitespace) | |
3894 | def leaveWhitespace(self): ... | |
3895 | ||
3896 | @replaced_by_pep8(ignore_whitespace) | |
3897 | def ignoreWhitespace(self): ... | |
3898 | # fmt: on | |
3899 | ||
3900 | ||
3901 | class And(ParseExpression): | |
3902 | """ | |
3903 | Requires all given :class:`ParseExpression` s to be found in the given order. | |
3904 | Expressions may be separated by whitespace. | |
3905 | May be constructed using the ``'+'`` operator. | |
3906 | May also be constructed using the ``'-'`` operator, which will | |
3907 | suppress backtracking. | |
3908 | ||
3909 | Example:: | |
3910 | ||
3911 | integer = Word(nums) | |
3912 | name_expr = Word(alphas)[1, ...] | |
3913 | ||
3914 | expr = And([integer("id"), name_expr("name"), integer("age")]) | |
3915 | # more easily written as: | |
3916 | expr = integer("id") + name_expr("name") + integer("age") | |
3917 | """ | |
3918 | ||
3919 | class _ErrorStop(Empty): | |
3920 | def __init__(self, *args, **kwargs): | |
3921 | super().__init__(*args, **kwargs) | |
3922 | self.leave_whitespace() | |
3923 | ||
3924 | def _generateDefaultName(self) -> str: | |
3925 | return "-" | |
3926 | ||
3927 | def __init__( | |
3928 | self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True | |
3929 | ): | |
3930 | exprs: List[ParserElement] = list(exprs_arg) | |
3931 | if exprs and Ellipsis in exprs: | |
3932 | tmp = [] | |
3933 | for i, expr in enumerate(exprs): | |
3934 | if expr is Ellipsis: | |
3935 | if i < len(exprs) - 1: | |
3936 | skipto_arg: ParserElement = typing.cast( | |
3937 | ParseExpression, (Empty() + exprs[i + 1]) | |
3938 | ).exprs[-1] | |
3939 | tmp.append(SkipTo(skipto_arg)("_skipped*")) | |
3940 | else: | |
3941 | raise Exception( | |
3942 | "cannot construct And with sequence ending in ..." | |
3943 | ) | |
3944 | else: | |
3945 | tmp.append(expr) | |
3946 | exprs[:] = tmp | |
3947 | super().__init__(exprs, savelist) | |
3948 | if self.exprs: | |
3949 | self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) | |
3950 | if not isinstance(self.exprs[0], White): | |
3951 | self.set_whitespace_chars( | |
3952 | self.exprs[0].whiteChars, | |
3953 | copy_defaults=self.exprs[0].copyDefaultWhiteChars, | |
3954 | ) | |
3955 | self.skipWhitespace = self.exprs[0].skipWhitespace | |
3956 | else: | |
3957 | self.skipWhitespace = False | |
3958 | else: | |
3959 | self.mayReturnEmpty = True | |
3960 | self.callPreparse = True | |
3961 | ||
3962 | def streamline(self) -> ParserElement: | |
3963 | # collapse any _PendingSkip's | |
3964 | if self.exprs: | |
3965 | if any( | |
3966 | isinstance(e, ParseExpression) | |
3967 | and e.exprs | |
3968 | and isinstance(e.exprs[-1], _PendingSkip) | |
3969 | for e in self.exprs[:-1] | |
3970 | ): | |
3971 | deleted_expr_marker = NoMatch() | |
3972 | for i, e in enumerate(self.exprs[:-1]): | |
3973 | if e is deleted_expr_marker: | |
3974 | continue | |
3975 | if ( | |
3976 | isinstance(e, ParseExpression) | |
3977 | and e.exprs | |
3978 | and isinstance(e.exprs[-1], _PendingSkip) | |
3979 | ): | |
3980 | e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] | |
3981 | self.exprs[i + 1] = deleted_expr_marker | |
3982 | self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] | |
3983 | ||
3984 | super().streamline() | |
3985 | ||
3986 | # link any IndentedBlocks to the prior expression | |
3987 | prev: ParserElement | |
3988 | cur: ParserElement | |
3989 | for prev, cur in zip(self.exprs, self.exprs[1:]): | |
3990 | # traverse cur or any first embedded expr of cur looking for an IndentedBlock | |
3991 | # (but watch out for recursive grammar) | |
3992 | seen = set() | |
3993 | while True: | |
3994 | if id(cur) in seen: | |
3995 | break | |
3996 | seen.add(id(cur)) | |
3997 | if isinstance(cur, IndentedBlock): | |
3998 | prev.add_parse_action( | |
3999 | lambda s, l, t, cur_=cur: setattr( | |
4000 | cur_, "parent_anchor", col(l, s) | |
4001 | ) | |
4002 | ) | |
4003 | break | |
4004 | subs = cur.recurse() | |
4005 | next_first = next(iter(subs), None) | |
4006 | if next_first is None: | |
4007 | break | |
4008 | cur = typing.cast(ParserElement, next_first) | |
4009 | ||
4010 | self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) | |
4011 | return self | |
4012 | ||
4013 | def parseImpl(self, instring, loc, doActions=True): | |
4014 | # pass False as callPreParse arg to _parse for first element, since we already | |
4015 | # pre-parsed the string as part of our And pre-parsing | |
4016 | loc, resultlist = self.exprs[0]._parse( | |
4017 | instring, loc, doActions, callPreParse=False | |
4018 | ) | |
4019 | errorStop = False | |
4020 | for e in self.exprs[1:]: | |
4021 | # if isinstance(e, And._ErrorStop): | |
4022 | if type(e) is And._ErrorStop: | |
4023 | errorStop = True | |
4024 | continue | |
4025 | if errorStop: | |
4026 | try: | |
4027 | loc, exprtokens = e._parse(instring, loc, doActions) | |
4028 | except ParseSyntaxException: | |
4029 | raise | |
4030 | except ParseBaseException as pe: | |
4031 | pe.__traceback__ = None | |
4032 | raise ParseSyntaxException._from_exception(pe) | |
4033 | except IndexError: | |
4034 | raise ParseSyntaxException( | |
4035 | instring, len(instring), self.errmsg, self | |
4036 | ) | |
4037 | else: | |
4038 | loc, exprtokens = e._parse(instring, loc, doActions) | |
4039 | resultlist += exprtokens | |
4040 | return loc, resultlist | |
4041 | ||
4042 | def __iadd__(self, other): | |
4043 | if isinstance(other, str_type): | |
4044 | other = self._literalStringClass(other) | |
4045 | if not isinstance(other, ParserElement): | |
4046 | return NotImplemented | |
4047 | return self.append(other) # And([self, other]) | |
4048 | ||
4049 | def _checkRecursion(self, parseElementList): | |
4050 | subRecCheckList = parseElementList[:] + [self] | |
4051 | for e in self.exprs: | |
4052 | e._checkRecursion(subRecCheckList) | |
4053 | if not e.mayReturnEmpty: | |
4054 | break | |
4055 | ||
4056 | def _generateDefaultName(self) -> str: | |
4057 | inner = " ".join(str(e) for e in self.exprs) | |
4058 | # strip off redundant inner {}'s | |
4059 | while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": | |
4060 | inner = inner[1:-1] | |
4061 | return "{" + inner + "}" | |
4062 | ||
4063 | ||
4064 | class Or(ParseExpression): | |
4065 | """Requires that at least one :class:`ParseExpression` is found. If | |
4066 | two expressions match, the expression that matches the longest | |
4067 | string will be used. May be constructed using the ``'^'`` | |
4068 | operator. | |
4069 | ||
4070 | Example:: | |
4071 | ||
4072 | # construct Or using '^' operator | |
4073 | ||
4074 | number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) | |
4075 | print(number.search_string("123 3.1416 789")) | |
4076 | ||
4077 | prints:: | |
4078 | ||
4079 | [['123'], ['3.1416'], ['789']] | |
4080 | """ | |
4081 | ||
4082 | def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): | |
4083 | super().__init__(exprs, savelist) | |
4084 | if self.exprs: | |
4085 | self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) | |
4086 | self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) | |
4087 | else: | |
4088 | self.mayReturnEmpty = True | |
4089 | ||
4090 | def streamline(self) -> ParserElement: | |
4091 | super().streamline() | |
4092 | if self.exprs: | |
4093 | self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) | |
4094 | self.saveAsList = any(e.saveAsList for e in self.exprs) | |
4095 | self.skipWhitespace = all( | |
4096 | e.skipWhitespace and not isinstance(e, White) for e in self.exprs | |
4097 | ) | |
4098 | else: | |
4099 | self.saveAsList = False | |
4100 | return self | |
4101 | ||
4102 | def parseImpl(self, instring, loc, doActions=True): | |
4103 | maxExcLoc = -1 | |
4104 | maxException = None | |
4105 | matches = [] | |
4106 | fatals = [] | |
4107 | if all(e.callPreparse for e in self.exprs): | |
4108 | loc = self.preParse(instring, loc) | |
4109 | for e in self.exprs: | |
4110 | try: | |
4111 | loc2 = e.try_parse(instring, loc, raise_fatal=True) | |
4112 | except ParseFatalException as pfe: | |
4113 | pfe.__traceback__ = None | |
4114 | pfe.parser_element = e | |
4115 | fatals.append(pfe) | |
4116 | maxException = None | |
4117 | maxExcLoc = -1 | |
4118 | except ParseException as err: | |
4119 | if not fatals: | |
4120 | err.__traceback__ = None | |
4121 | if err.loc > maxExcLoc: | |
4122 | maxException = err | |
4123 | maxExcLoc = err.loc | |
4124 | except IndexError: | |
4125 | if len(instring) > maxExcLoc: | |
4126 | maxException = ParseException( | |
4127 | instring, len(instring), e.errmsg, self | |
4128 | ) | |
4129 | maxExcLoc = len(instring) | |
4130 | else: | |
4131 | # save match among all matches, to retry longest to shortest | |
4132 | matches.append((loc2, e)) | |
4133 | ||
4134 | if matches: | |
4135 | # re-evaluate all matches in descending order of length of match, in case attached actions | |
4136 | # might change whether or how much they match of the input. | |
4137 | matches.sort(key=itemgetter(0), reverse=True) | |
4138 | ||
4139 | if not doActions: | |
4140 | # no further conditions or parse actions to change the selection of | |
4141 | # alternative, so the first match will be the best match | |
4142 | best_expr = matches[0][1] | |
4143 | return best_expr._parse(instring, loc, doActions) | |
4144 | ||
4145 | longest = -1, None | |
4146 | for loc1, expr1 in matches: | |
4147 | if loc1 <= longest[0]: | |
4148 | # already have a longer match than this one will deliver, we are done | |
4149 | return longest | |
4150 | ||
4151 | try: | |
4152 | loc2, toks = expr1._parse(instring, loc, doActions) | |
4153 | except ParseException as err: | |
4154 | err.__traceback__ = None | |
4155 | if err.loc > maxExcLoc: | |
4156 | maxException = err | |
4157 | maxExcLoc = err.loc | |
4158 | else: | |
4159 | if loc2 >= loc1: | |
4160 | return loc2, toks | |
4161 | # didn't match as much as before | |
4162 | elif loc2 > longest[0]: | |
4163 | longest = loc2, toks | |
4164 | ||
4165 | if longest != (-1, None): | |
4166 | return longest | |
4167 | ||
4168 | if fatals: | |
4169 | if len(fatals) > 1: | |
4170 | fatals.sort(key=lambda e: -e.loc) | |
4171 | if fatals[0].loc == fatals[1].loc: | |
4172 | fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) | |
4173 | max_fatal = fatals[0] | |
4174 | raise max_fatal | |
4175 | ||
4176 | if maxException is not None: | |
4177 | # infer from this check that all alternatives failed at the current position | |
4178 | # so emit this collective error message instead of any single error message | |
4179 | if maxExcLoc == loc: | |
4180 | maxException.msg = self.errmsg | |
4181 | raise maxException | |
4182 | else: | |
4183 | raise ParseException( | |
4184 | instring, loc, "no defined alternatives to match", self | |
4185 | ) | |
4186 | ||
4187 | def __ixor__(self, other): | |
4188 | if isinstance(other, str_type): | |
4189 | other = self._literalStringClass(other) | |
4190 | if not isinstance(other, ParserElement): | |
4191 | return NotImplemented | |
4192 | return self.append(other) # Or([self, other]) | |
4193 | ||
4194 | def _generateDefaultName(self) -> str: | |
4195 | return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" | |
4196 | ||
4197 | def _setResultsName(self, name, listAllMatches=False): | |
4198 | if ( | |
4199 | __diag__.warn_multiple_tokens_in_named_alternation | |
4200 | and Diagnostics.warn_multiple_tokens_in_named_alternation | |
4201 | not in self.suppress_warnings_ | |
4202 | ): | |
4203 | if any( | |
4204 | isinstance(e, And) | |
4205 | and Diagnostics.warn_multiple_tokens_in_named_alternation | |
4206 | not in e.suppress_warnings_ | |
4207 | for e in self.exprs | |
4208 | ): | |
4209 | warnings.warn( | |
4210 | "{}: setting results name {!r} on {} expression " | |
4211 | "will return a list of all parsed tokens in an And alternative, " | |
4212 | "in prior versions only the first token was returned; enclose " | |
4213 | "contained argument in Group".format( | |
4214 | "warn_multiple_tokens_in_named_alternation", | |
4215 | name, | |
4216 | type(self).__name__, | |
4217 | ), | |
4218 | stacklevel=3, | |
4219 | ) | |
4220 | ||
4221 | return super()._setResultsName(name, listAllMatches) | |
4222 | ||
4223 | ||
4224 | class MatchFirst(ParseExpression): | |
4225 | """Requires that at least one :class:`ParseExpression` is found. If | |
4226 | more than one expression matches, the first one listed is the one that will | |
4227 | match. May be constructed using the ``'|'`` operator. | |
4228 | ||
4229 | Example:: | |
4230 | ||
4231 | # construct MatchFirst using '|' operator | |
4232 | ||
4233 | # watch the order of expressions to match | |
4234 | number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) | |
4235 | print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] | |
4236 | ||
4237 | # put more selective expression first | |
4238 | number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) | |
4239 | print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] | |
4240 | """ | |
4241 | ||
4242 | def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): | |
4243 | super().__init__(exprs, savelist) | |
4244 | if self.exprs: | |
4245 | self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) | |
4246 | self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) | |
4247 | else: | |
4248 | self.mayReturnEmpty = True | |
4249 | ||
4250 | def streamline(self) -> ParserElement: | |
4251 | if self.streamlined: | |
4252 | return self | |
4253 | ||
4254 | super().streamline() | |
4255 | if self.exprs: | |
4256 | self.saveAsList = any(e.saveAsList for e in self.exprs) | |
4257 | self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) | |
4258 | self.skipWhitespace = all( | |
4259 | e.skipWhitespace and not isinstance(e, White) for e in self.exprs | |
4260 | ) | |
4261 | else: | |
4262 | self.saveAsList = False | |
4263 | self.mayReturnEmpty = True | |
4264 | return self | |
4265 | ||
4266 | def parseImpl(self, instring, loc, doActions=True): | |
4267 | maxExcLoc = -1 | |
4268 | maxException = None | |
4269 | ||
4270 | for e in self.exprs: | |
4271 | try: | |
4272 | return e._parse( | |
4273 | instring, | |
4274 | loc, | |
4275 | doActions, | |
4276 | ) | |
4277 | except ParseFatalException as pfe: | |
4278 | pfe.__traceback__ = None | |
4279 | pfe.parser_element = e | |
4280 | raise | |
4281 | except ParseException as err: | |
4282 | if err.loc > maxExcLoc: | |
4283 | maxException = err | |
4284 | maxExcLoc = err.loc | |
4285 | except IndexError: | |
4286 | if len(instring) > maxExcLoc: | |
4287 | maxException = ParseException( | |
4288 | instring, len(instring), e.errmsg, self | |
4289 | ) | |
4290 | maxExcLoc = len(instring) | |
4291 | ||
4292 | if maxException is not None: | |
4293 | # infer from this check that all alternatives failed at the current position | |
4294 | # so emit this collective error message instead of any individual error message | |
4295 | if maxExcLoc == loc: | |
4296 | maxException.msg = self.errmsg | |
4297 | raise maxException | |
4298 | else: | |
4299 | raise ParseException( | |
4300 | instring, loc, "no defined alternatives to match", self | |
4301 | ) | |
4302 | ||
4303 | def __ior__(self, other): | |
4304 | if isinstance(other, str_type): | |
4305 | other = self._literalStringClass(other) | |
4306 | if not isinstance(other, ParserElement): | |
4307 | return NotImplemented | |
4308 | return self.append(other) # MatchFirst([self, other]) | |
4309 | ||
4310 | def _generateDefaultName(self) -> str: | |
4311 | return "{" + " | ".join(str(e) for e in self.exprs) + "}" | |
4312 | ||
4313 | def _setResultsName(self, name, listAllMatches=False): | |
4314 | if ( | |
4315 | __diag__.warn_multiple_tokens_in_named_alternation | |
4316 | and Diagnostics.warn_multiple_tokens_in_named_alternation | |
4317 | not in self.suppress_warnings_ | |
4318 | ): | |
4319 | if any( | |
4320 | isinstance(e, And) | |
4321 | and Diagnostics.warn_multiple_tokens_in_named_alternation | |
4322 | not in e.suppress_warnings_ | |
4323 | for e in self.exprs | |
4324 | ): | |
4325 | warnings.warn( | |
4326 | "{}: setting results name {!r} on {} expression " | |
4327 | "will return a list of all parsed tokens in an And alternative, " | |
4328 | "in prior versions only the first token was returned; enclose " | |
4329 | "contained argument in Group".format( | |
4330 | "warn_multiple_tokens_in_named_alternation", | |
4331 | name, | |
4332 | type(self).__name__, | |
4333 | ), | |
4334 | stacklevel=3, | |
4335 | ) | |
4336 | ||
4337 | return super()._setResultsName(name, listAllMatches) | |
4338 | ||
4339 | ||
4340 | class Each(ParseExpression): | |
4341 | """Requires all given :class:`ParseExpression` s to be found, but in | |
4342 | any order. Expressions may be separated by whitespace. | |
4343 | ||
4344 | May be constructed using the ``'&'`` operator. | |
4345 | ||
4346 | Example:: | |
4347 | ||
4348 | color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") | |
4349 | shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") | |
4350 | integer = Word(nums) | |
4351 | shape_attr = "shape:" + shape_type("shape") | |
4352 | posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") | |
4353 | color_attr = "color:" + color("color") | |
4354 | size_attr = "size:" + integer("size") | |
4355 | ||
4356 | # use Each (using operator '&') to accept attributes in any order | |
4357 | # (shape and posn are required, color and size are optional) | |
4358 | shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) | |
4359 | ||
4360 | shape_spec.run_tests(''' | |
4361 | shape: SQUARE color: BLACK posn: 100, 120 | |
4362 | shape: CIRCLE size: 50 color: BLUE posn: 50,80 | |
4363 | color:GREEN size:20 shape:TRIANGLE posn:20,40 | |
4364 | ''' | |
4365 | ) | |
4366 | ||
4367 | prints:: | |
4368 | ||
4369 | shape: SQUARE color: BLACK posn: 100, 120 | |
4370 | ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] | |
4371 | - color: BLACK | |
4372 | - posn: ['100', ',', '120'] | |
4373 | - x: 100 | |
4374 | - y: 120 | |
4375 | - shape: SQUARE | |
4376 | ||
4377 | ||
4378 | shape: CIRCLE size: 50 color: BLUE posn: 50,80 | |
4379 | ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] | |
4380 | - color: BLUE | |
4381 | - posn: ['50', ',', '80'] | |
4382 | - x: 50 | |
4383 | - y: 80 | |
4384 | - shape: CIRCLE | |
4385 | - size: 50 | |
4386 | ||
4387 | ||
4388 | color: GREEN size: 20 shape: TRIANGLE posn: 20,40 | |
4389 | ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] | |
4390 | - color: GREEN | |
4391 | - posn: ['20', ',', '40'] | |
4392 | - x: 20 | |
4393 | - y: 40 | |
4394 | - shape: TRIANGLE | |
4395 | - size: 20 | |
4396 | """ | |
4397 | ||
4398 | def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): | |
4399 | super().__init__(exprs, savelist) | |
4400 | if self.exprs: | |
4401 | self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) | |
4402 | else: | |
4403 | self.mayReturnEmpty = True | |
4404 | self.skipWhitespace = True | |
4405 | self.initExprGroups = True | |
4406 | self.saveAsList = True | |
4407 | ||
4408 | def __iand__(self, other): | |
4409 | if isinstance(other, str_type): | |
4410 | other = self._literalStringClass(other) | |
4411 | if not isinstance(other, ParserElement): | |
4412 | return NotImplemented | |
4413 | return self.append(other) # Each([self, other]) | |
4414 | ||
4415 | def streamline(self) -> ParserElement: | |
4416 | super().streamline() | |
4417 | if self.exprs: | |
4418 | self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) | |
4419 | else: | |
4420 | self.mayReturnEmpty = True | |
4421 | return self | |
4422 | ||
4423 | def parseImpl(self, instring, loc, doActions=True): | |
4424 | if self.initExprGroups: | |
4425 | self.opt1map = dict( | |
4426 | (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) | |
4427 | ) | |
4428 | opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] | |
4429 | opt2 = [ | |
4430 | e | |
4431 | for e in self.exprs | |
4432 | if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) | |
4433 | ] | |
4434 | self.optionals = opt1 + opt2 | |
4435 | self.multioptionals = [ | |
4436 | e.expr.set_results_name(e.resultsName, list_all_matches=True) | |
4437 | for e in self.exprs | |
4438 | if isinstance(e, _MultipleMatch) | |
4439 | ] | |
4440 | self.multirequired = [ | |
4441 | e.expr.set_results_name(e.resultsName, list_all_matches=True) | |
4442 | for e in self.exprs | |
4443 | if isinstance(e, OneOrMore) | |
4444 | ] | |
4445 | self.required = [ | |
4446 | e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) | |
4447 | ] | |
4448 | self.required += self.multirequired | |
4449 | self.initExprGroups = False | |
4450 | ||
4451 | tmpLoc = loc | |
4452 | tmpReqd = self.required[:] | |
4453 | tmpOpt = self.optionals[:] | |
4454 | multis = self.multioptionals[:] | |
4455 | matchOrder = [] | |
4456 | ||
4457 | keepMatching = True | |
4458 | failed = [] | |
4459 | fatals = [] | |
4460 | while keepMatching: | |
4461 | tmpExprs = tmpReqd + tmpOpt + multis | |
4462 | failed.clear() | |
4463 | fatals.clear() | |
4464 | for e in tmpExprs: | |
4465 | try: | |
4466 | tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) | |
4467 | except ParseFatalException as pfe: | |
4468 | pfe.__traceback__ = None | |
4469 | pfe.parser_element = e | |
4470 | fatals.append(pfe) | |
4471 | failed.append(e) | |
4472 | except ParseException: | |
4473 | failed.append(e) | |
4474 | else: | |
4475 | matchOrder.append(self.opt1map.get(id(e), e)) | |
4476 | if e in tmpReqd: | |
4477 | tmpReqd.remove(e) | |
4478 | elif e in tmpOpt: | |
4479 | tmpOpt.remove(e) | |
4480 | if len(failed) == len(tmpExprs): | |
4481 | keepMatching = False | |
4482 | ||
4483 | # look for any ParseFatalExceptions | |
4484 | if fatals: | |
4485 | if len(fatals) > 1: | |
4486 | fatals.sort(key=lambda e: -e.loc) | |
4487 | if fatals[0].loc == fatals[1].loc: | |
4488 | fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) | |
4489 | max_fatal = fatals[0] | |
4490 | raise max_fatal | |
4491 | ||
4492 | if tmpReqd: | |
4493 | missing = ", ".join([str(e) for e in tmpReqd]) | |
4494 | raise ParseException( | |
4495 | instring, | |
4496 | loc, | |
4497 | f"Missing one or more required elements ({missing})", | |
4498 | ) | |
4499 | ||
4500 | # add any unmatched Opts, in case they have default values defined | |
4501 | matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] | |
4502 | ||
4503 | total_results = ParseResults([]) | |
4504 | for e in matchOrder: | |
4505 | loc, results = e._parse(instring, loc, doActions) | |
4506 | total_results += results | |
4507 | ||
4508 | return loc, total_results | |
4509 | ||
4510 | def _generateDefaultName(self) -> str: | |
4511 | return "{" + " & ".join(str(e) for e in self.exprs) + "}" | |
4512 | ||
4513 | ||
4514 | class ParseElementEnhance(ParserElement): | |
4515 | """Abstract subclass of :class:`ParserElement`, for combining and | |
4516 | post-processing parsed tokens. | |
4517 | """ | |
4518 | ||
4519 | def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): | |
4520 | super().__init__(savelist) | |
4521 | if isinstance(expr, str_type): | |
4522 | expr_str = typing.cast(str, expr) | |
4523 | if issubclass(self._literalStringClass, Token): | |
4524 | expr = self._literalStringClass(expr_str) # type: ignore[call-arg] | |
4525 | elif issubclass(type(self), self._literalStringClass): | |
4526 | expr = Literal(expr_str) | |
4527 | else: | |
4528 | expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] | |
4529 | expr = typing.cast(ParserElement, expr) | |
4530 | self.expr = expr | |
4531 | if expr is not None: | |
4532 | self.mayIndexError = expr.mayIndexError | |
4533 | self.mayReturnEmpty = expr.mayReturnEmpty | |
4534 | self.set_whitespace_chars( | |
4535 | expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars | |
4536 | ) | |
4537 | self.skipWhitespace = expr.skipWhitespace | |
4538 | self.saveAsList = expr.saveAsList | |
4539 | self.callPreparse = expr.callPreparse | |
4540 | self.ignoreExprs.extend(expr.ignoreExprs) | |
4541 | ||
4542 | def recurse(self) -> List[ParserElement]: | |
4543 | return [self.expr] if self.expr is not None else [] | |
4544 | ||
4545 | def parseImpl(self, instring, loc, doActions=True): | |
4546 | if self.expr is not None: | |
4547 | try: | |
4548 | return self.expr._parse(instring, loc, doActions, callPreParse=False) | |
4549 | except ParseBaseException as pbe: | |
4550 | pbe.msg = self.errmsg | |
4551 | raise | |
4552 | else: | |
4553 | raise ParseException(instring, loc, "No expression defined", self) | |
4554 | ||
4555 | def leave_whitespace(self, recursive: bool = True) -> ParserElement: | |
4556 | super().leave_whitespace(recursive) | |
4557 | ||
4558 | if recursive: | |
4559 | if self.expr is not None: | |
4560 | self.expr = self.expr.copy() | |
4561 | self.expr.leave_whitespace(recursive) | |
4562 | return self | |
4563 | ||
4564 | def ignore_whitespace(self, recursive: bool = True) -> ParserElement: | |
4565 | super().ignore_whitespace(recursive) | |
4566 | ||
4567 | if recursive: | |
4568 | if self.expr is not None: | |
4569 | self.expr = self.expr.copy() | |
4570 | self.expr.ignore_whitespace(recursive) | |
4571 | return self | |
4572 | ||
4573 | def ignore(self, other) -> ParserElement: | |
4574 | if isinstance(other, Suppress): | |
4575 | if other not in self.ignoreExprs: | |
4576 | super().ignore(other) | |
4577 | if self.expr is not None: | |
4578 | self.expr.ignore(self.ignoreExprs[-1]) | |
4579 | else: | |
4580 | super().ignore(other) | |
4581 | if self.expr is not None: | |
4582 | self.expr.ignore(self.ignoreExprs[-1]) | |
4583 | return self | |
4584 | ||
4585 | def streamline(self) -> ParserElement: | |
4586 | super().streamline() | |
4587 | if self.expr is not None: | |
4588 | self.expr.streamline() | |
4589 | return self | |
4590 | ||
4591 | def _checkRecursion(self, parseElementList): | |
4592 | if self in parseElementList: | |
4593 | raise RecursiveGrammarException(parseElementList + [self]) | |
4594 | subRecCheckList = parseElementList[:] + [self] | |
4595 | if self.expr is not None: | |
4596 | self.expr._checkRecursion(subRecCheckList) | |
4597 | ||
4598 | def validate(self, validateTrace=None) -> None: | |
4599 | warnings.warn( | |
4600 | "ParserElement.validate() is deprecated, and should not be used to check for left recursion", | |
4601 | DeprecationWarning, | |
4602 | stacklevel=2, | |
4603 | ) | |
4604 | if validateTrace is None: | |
4605 | validateTrace = [] | |
4606 | tmp = validateTrace[:] + [self] | |
4607 | if self.expr is not None: | |
4608 | self.expr.validate(tmp) | |
4609 | self._checkRecursion([]) | |
4610 | ||
4611 | def _generateDefaultName(self) -> str: | |
4612 | return f"{self.__class__.__name__}:({str(self.expr)})" | |
4613 | ||
4614 | # Compatibility synonyms | |
4615 | # fmt: off | |
4616 | @replaced_by_pep8(leave_whitespace) | |
4617 | def leaveWhitespace(self): ... | |
4618 | ||
4619 | @replaced_by_pep8(ignore_whitespace) | |
4620 | def ignoreWhitespace(self): ... | |
4621 | # fmt: on | |
4622 | ||
4623 | ||
4624 | class IndentedBlock(ParseElementEnhance): | |
4625 | """ | |
4626 | Expression to match one or more expressions at a given indentation level. | |
4627 | Useful for parsing text where structure is implied by indentation (like Python source code). | |
4628 | """ | |
4629 | ||
4630 | class _Indent(Empty): | |
4631 | def __init__(self, ref_col: int): | |
4632 | super().__init__() | |
4633 | self.errmsg = f"expected indent at column {ref_col}" | |
4634 | self.add_condition(lambda s, l, t: col(l, s) == ref_col) | |
4635 | ||
4636 | class _IndentGreater(Empty): | |
4637 | def __init__(self, ref_col: int): | |
4638 | super().__init__() | |
4639 | self.errmsg = f"expected indent at column greater than {ref_col}" | |
4640 | self.add_condition(lambda s, l, t: col(l, s) > ref_col) | |
4641 | ||
4642 | def __init__( | |
4643 | self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True | |
4644 | ): | |
4645 | super().__init__(expr, savelist=True) | |
4646 | # if recursive: | |
4647 | # raise NotImplementedError("IndentedBlock with recursive is not implemented") | |
4648 | self._recursive = recursive | |
4649 | self._grouped = grouped | |
4650 | self.parent_anchor = 1 | |
4651 | ||
4652 | def parseImpl(self, instring, loc, doActions=True): | |
4653 | # advance parse position to non-whitespace by using an Empty() | |
4654 | # this should be the column to be used for all subsequent indented lines | |
4655 | anchor_loc = Empty().preParse(instring, loc) | |
4656 | ||
4657 | # see if self.expr matches at the current location - if not it will raise an exception | |
4658 | # and no further work is necessary | |
4659 | self.expr.try_parse(instring, anchor_loc, do_actions=doActions) | |
4660 | ||
4661 | indent_col = col(anchor_loc, instring) | |
4662 | peer_detect_expr = self._Indent(indent_col) | |
4663 | ||
4664 | inner_expr = Empty() + peer_detect_expr + self.expr | |
4665 | if self._recursive: | |
4666 | sub_indent = self._IndentGreater(indent_col) | |
4667 | nested_block = IndentedBlock( | |
4668 | self.expr, recursive=self._recursive, grouped=self._grouped | |
4669 | ) | |
4670 | nested_block.set_debug(self.debug) | |
4671 | nested_block.parent_anchor = indent_col | |
4672 | inner_expr += Opt(sub_indent + nested_block) | |
4673 | ||
4674 | inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") | |
4675 | block = OneOrMore(inner_expr) | |
4676 | ||
4677 | trailing_undent = self._Indent(self.parent_anchor) | StringEnd() | |
4678 | ||
4679 | if self._grouped: | |
4680 | wrapper = Group | |
4681 | else: | |
4682 | wrapper = lambda expr: expr | |
4683 | return (wrapper(block) + Optional(trailing_undent)).parseImpl( | |
4684 | instring, anchor_loc, doActions | |
4685 | ) | |
4686 | ||
4687 | ||
4688 | class AtStringStart(ParseElementEnhance): | |
4689 | """Matches if expression matches at the beginning of the parse | |
4690 | string:: | |
4691 | ||
4692 | AtStringStart(Word(nums)).parse_string("123") | |
4693 | # prints ["123"] | |
4694 | ||
4695 | AtStringStart(Word(nums)).parse_string(" 123") | |
4696 | # raises ParseException | |
4697 | """ | |
4698 | ||
4699 | def __init__(self, expr: Union[ParserElement, str]): | |
4700 | super().__init__(expr) | |
4701 | self.callPreparse = False | |
4702 | ||
4703 | def parseImpl(self, instring, loc, doActions=True): | |
4704 | if loc != 0: | |
4705 | raise ParseException(instring, loc, "not found at string start") | |
4706 | return super().parseImpl(instring, loc, doActions) | |
4707 | ||
4708 | ||
4709 | class AtLineStart(ParseElementEnhance): | |
4710 | r"""Matches if an expression matches at the beginning of a line within | |
4711 | the parse string | |
4712 | ||
4713 | Example:: | |
4714 | ||
4715 | test = '''\ | |
4716 | AAA this line | |
4717 | AAA and this line | |
4718 | AAA but not this one | |
4719 | B AAA and definitely not this one | |
4720 | ''' | |
4721 | ||
4722 | for t in (AtLineStart('AAA') + rest_of_line).search_string(test): | |
4723 | print(t) | |
4724 | ||
4725 | prints:: | |
4726 | ||
4727 | ['AAA', ' this line'] | |
4728 | ['AAA', ' and this line'] | |
4729 | ||
4730 | """ | |
4731 | ||
4732 | def __init__(self, expr: Union[ParserElement, str]): | |
4733 | super().__init__(expr) | |
4734 | self.callPreparse = False | |
4735 | ||
4736 | def parseImpl(self, instring, loc, doActions=True): | |
4737 | if col(loc, instring) != 1: | |
4738 | raise ParseException(instring, loc, "not found at line start") | |
4739 | return super().parseImpl(instring, loc, doActions) | |
4740 | ||
4741 | ||
4742 | class FollowedBy(ParseElementEnhance): | |
4743 | """Lookahead matching of the given parse expression. | |
4744 | ``FollowedBy`` does *not* advance the parsing position within | |
4745 | the input string, it only verifies that the specified parse | |
4746 | expression matches at the current position. ``FollowedBy`` | |
4747 | always returns a null token list. If any results names are defined | |
4748 | in the lookahead expression, those *will* be returned for access by | |
4749 | name. | |
4750 | ||
4751 | Example:: | |
4752 | ||
4753 | # use FollowedBy to match a label only if it is followed by a ':' | |
4754 | data_word = Word(alphas) | |
4755 | label = data_word + FollowedBy(':') | |
4756 | attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) | |
4757 | ||
4758 | attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() | |
4759 | ||
4760 | prints:: | |
4761 | ||
4762 | [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] | |
4763 | """ | |
4764 | ||
4765 | def __init__(self, expr: Union[ParserElement, str]): | |
4766 | super().__init__(expr) | |
4767 | self.mayReturnEmpty = True | |
4768 | ||
4769 | def parseImpl(self, instring, loc, doActions=True): | |
4770 | # by using self._expr.parse and deleting the contents of the returned ParseResults list | |
4771 | # we keep any named results that were defined in the FollowedBy expression | |
4772 | _, ret = self.expr._parse(instring, loc, doActions=doActions) | |
4773 | del ret[:] | |
4774 | ||
4775 | return loc, ret | |
4776 | ||
4777 | ||
4778 | class PrecededBy(ParseElementEnhance): | |
4779 | """Lookbehind matching of the given parse expression. | |
4780 | ``PrecededBy`` does not advance the parsing position within the | |
4781 | input string, it only verifies that the specified parse expression | |
4782 | matches prior to the current position. ``PrecededBy`` always | |
4783 | returns a null token list, but if a results name is defined on the | |
4784 | given expression, it is returned. | |
4785 | ||
4786 | Parameters: | |
4787 | ||
4788 | - ``expr`` - expression that must match prior to the current parse | |
4789 | location | |
4790 | - ``retreat`` - (default= ``None``) - (int) maximum number of characters | |
4791 | to lookbehind prior to the current parse location | |
4792 | ||
4793 | If the lookbehind expression is a string, :class:`Literal`, | |
4794 | :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` | |
4795 | with a specified exact or maximum length, then the retreat | |
4796 | parameter is not required. Otherwise, retreat must be specified to | |
4797 | give a maximum number of characters to look back from | |
4798 | the current parse position for a lookbehind match. | |
4799 | ||
4800 | Example:: | |
4801 | ||
4802 | # VB-style variable names with type prefixes | |
4803 | int_var = PrecededBy("#") + pyparsing_common.identifier | |
4804 | str_var = PrecededBy("$") + pyparsing_common.identifier | |
4805 | ||
4806 | """ | |
4807 | ||
4808 | def __init__( | |
4809 | self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None | |
4810 | ): | |
4811 | super().__init__(expr) | |
4812 | self.expr = self.expr().leave_whitespace() | |
4813 | self.mayReturnEmpty = True | |
4814 | self.mayIndexError = False | |
4815 | self.exact = False | |
4816 | if isinstance(expr, str_type): | |
4817 | expr = typing.cast(str, expr) | |
4818 | retreat = len(expr) | |
4819 | self.exact = True | |
4820 | elif isinstance(expr, (Literal, Keyword)): | |
4821 | retreat = expr.matchLen | |
4822 | self.exact = True | |
4823 | elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: | |
4824 | retreat = expr.maxLen | |
4825 | self.exact = True | |
4826 | elif isinstance(expr, PositionToken): | |
4827 | retreat = 0 | |
4828 | self.exact = True | |
4829 | self.retreat = retreat | |
4830 | self.errmsg = "not preceded by " + str(expr) | |
4831 | self.skipWhitespace = False | |
4832 | self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) | |
4833 | ||
4834 | def parseImpl(self, instring, loc=0, doActions=True): | |
4835 | if self.exact: | |
4836 | if loc < self.retreat: | |
4837 | raise ParseException(instring, loc, self.errmsg) | |
4838 | start = loc - self.retreat | |
4839 | _, ret = self.expr._parse(instring, start) | |
4840 | else: | |
4841 | # retreat specified a maximum lookbehind window, iterate | |
4842 | test_expr = self.expr + StringEnd() | |
4843 | instring_slice = instring[max(0, loc - self.retreat) : loc] | |
4844 | last_expr = ParseException(instring, loc, self.errmsg) | |
4845 | for offset in range(1, min(loc, self.retreat + 1) + 1): | |
4846 | try: | |
4847 | # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) | |
4848 | _, ret = test_expr._parse( | |
4849 | instring_slice, len(instring_slice) - offset | |
4850 | ) | |
4851 | except ParseBaseException as pbe: | |
4852 | last_expr = pbe | |
4853 | else: | |
4854 | break | |
4855 | else: | |
4856 | raise last_expr | |
4857 | return loc, ret | |
4858 | ||
4859 | ||
4860 | class Located(ParseElementEnhance): | |
4861 | """ | |
4862 | Decorates a returned token with its starting and ending | |
4863 | locations in the input string. | |
4864 | ||
4865 | This helper adds the following results names: | |
4866 | ||
4867 | - ``locn_start`` - location where matched expression begins | |
4868 | - ``locn_end`` - location where matched expression ends | |
4869 | - ``value`` - the actual parsed results | |
4870 | ||
4871 | Be careful if the input text contains ``<TAB>`` characters, you | |
4872 | may want to call :class:`ParserElement.parse_with_tabs` | |
4873 | ||
4874 | Example:: | |
4875 | ||
4876 | wd = Word(alphas) | |
4877 | for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): | |
4878 | print(match) | |
4879 | ||
4880 | prints:: | |
4881 | ||
4882 | [0, ['ljsdf'], 5] | |
4883 | [8, ['lksdjjf'], 15] | |
4884 | [18, ['lkkjj'], 23] | |
4885 | ||
4886 | """ | |
4887 | ||
4888 | def parseImpl(self, instring, loc, doActions=True): | |
4889 | start = loc | |
4890 | loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) | |
4891 | ret_tokens = ParseResults([start, tokens, loc]) | |
4892 | ret_tokens["locn_start"] = start | |
4893 | ret_tokens["value"] = tokens | |
4894 | ret_tokens["locn_end"] = loc | |
4895 | if self.resultsName: | |
4896 | # must return as a list, so that the name will be attached to the complete group | |
4897 | return loc, [ret_tokens] | |
4898 | else: | |
4899 | return loc, ret_tokens | |
4900 | ||
4901 | ||
4902 | class NotAny(ParseElementEnhance): | |
4903 | """ | |
4904 | Lookahead to disallow matching with the given parse expression. | |
4905 | ``NotAny`` does *not* advance the parsing position within the | |
4906 | input string, it only verifies that the specified parse expression | |
4907 | does *not* match at the current position. Also, ``NotAny`` does | |
4908 | *not* skip over leading whitespace. ``NotAny`` always returns | |
4909 | a null token list. May be constructed using the ``'~'`` operator. | |
4910 | ||
4911 | Example:: | |
4912 | ||
4913 | AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) | |
4914 | ||
4915 | # take care not to mistake keywords for identifiers | |
4916 | ident = ~(AND | OR | NOT) + Word(alphas) | |
4917 | boolean_term = Opt(NOT) + ident | |
4918 | ||
4919 | # very crude boolean expression - to support parenthesis groups and | |
4920 | # operation hierarchy, use infix_notation | |
4921 | boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] | |
4922 | ||
4923 | # integers that are followed by "." are actually floats | |
4924 | integer = Word(nums) + ~Char(".") | |
4925 | """ | |
4926 | ||
4927 | def __init__(self, expr: Union[ParserElement, str]): | |
4928 | super().__init__(expr) | |
4929 | # do NOT use self.leave_whitespace(), don't want to propagate to exprs | |
4930 | # self.leave_whitespace() | |
4931 | self.skipWhitespace = False | |
4932 | ||
4933 | self.mayReturnEmpty = True | |
4934 | self.errmsg = "Found unwanted token, " + str(self.expr) | |
4935 | ||
4936 | def parseImpl(self, instring, loc, doActions=True): | |
4937 | if self.expr.can_parse_next(instring, loc, do_actions=doActions): | |
4938 | raise ParseException(instring, loc, self.errmsg, self) | |
4939 | return loc, [] | |
4940 | ||
4941 | def _generateDefaultName(self) -> str: | |
4942 | return "~{" + str(self.expr) + "}" | |
4943 | ||
4944 | ||
4945 | class _MultipleMatch(ParseElementEnhance): | |
4946 | def __init__( | |
4947 | self, | |
4948 | expr: Union[str, ParserElement], | |
4949 | stop_on: typing.Optional[Union[ParserElement, str]] = None, | |
4950 | *, | |
4951 | stopOn: typing.Optional[Union[ParserElement, str]] = None, | |
4952 | ): | |
4953 | super().__init__(expr) | |
4954 | stopOn = stopOn or stop_on | |
4955 | self.saveAsList = True | |
4956 | ender = stopOn | |
4957 | if isinstance(ender, str_type): | |
4958 | ender = self._literalStringClass(ender) | |
4959 | self.stopOn(ender) | |
4960 | ||
4961 | def stopOn(self, ender) -> ParserElement: | |
4962 | if isinstance(ender, str_type): | |
4963 | ender = self._literalStringClass(ender) | |
4964 | self.not_ender = ~ender if ender is not None else None | |
4965 | return self | |
4966 | ||
4967 | def parseImpl(self, instring, loc, doActions=True): | |
4968 | self_expr_parse = self.expr._parse | |
4969 | self_skip_ignorables = self._skipIgnorables | |
4970 | check_ender = self.not_ender is not None | |
4971 | if check_ender: | |
4972 | try_not_ender = self.not_ender.try_parse | |
4973 | ||
4974 | # must be at least one (but first see if we are the stopOn sentinel; | |
4975 | # if so, fail) | |
4976 | if check_ender: | |
4977 | try_not_ender(instring, loc) | |
4978 | loc, tokens = self_expr_parse(instring, loc, doActions) | |
4979 | try: | |
4980 | hasIgnoreExprs = not not self.ignoreExprs | |
4981 | while 1: | |
4982 | if check_ender: | |
4983 | try_not_ender(instring, loc) | |
4984 | if hasIgnoreExprs: | |
4985 | preloc = self_skip_ignorables(instring, loc) | |
4986 | else: | |
4987 | preloc = loc | |
4988 | loc, tmptokens = self_expr_parse(instring, preloc, doActions) | |
4989 | tokens += tmptokens | |
4990 | except (ParseException, IndexError): | |
4991 | pass | |
4992 | ||
4993 | return loc, tokens | |
4994 | ||
4995 | def _setResultsName(self, name, listAllMatches=False): | |
4996 | if ( | |
4997 | __diag__.warn_ungrouped_named_tokens_in_collection | |
4998 | and Diagnostics.warn_ungrouped_named_tokens_in_collection | |
4999 | not in self.suppress_warnings_ | |
5000 | ): | |
5001 | for e in [self.expr] + self.expr.recurse(): | |
5002 | if ( | |
5003 | isinstance(e, ParserElement) | |
5004 | and e.resultsName | |
5005 | and Diagnostics.warn_ungrouped_named_tokens_in_collection | |
5006 | not in e.suppress_warnings_ | |
5007 | ): | |
5008 | warnings.warn( | |
5009 | "{}: setting results name {!r} on {} expression " | |
5010 | "collides with {!r} on contained expression".format( | |
5011 | "warn_ungrouped_named_tokens_in_collection", | |
5012 | name, | |
5013 | type(self).__name__, | |
5014 | e.resultsName, | |
5015 | ), | |
5016 | stacklevel=3, | |
5017 | ) | |
5018 | ||
5019 | return super()._setResultsName(name, listAllMatches) | |
5020 | ||
5021 | ||
5022 | class OneOrMore(_MultipleMatch): | |
5023 | """ | |
5024 | Repetition of one or more of the given expression. | |
5025 | ||
5026 | Parameters: | |
5027 | ||
5028 | - ``expr`` - expression that must match one or more times | |
5029 | - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel | |
5030 | (only required if the sentinel would ordinarily match the repetition | |
5031 | expression) | |
5032 | ||
5033 | Example:: | |
5034 | ||
5035 | data_word = Word(alphas) | |
5036 | label = data_word + FollowedBy(':') | |
5037 | attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) | |
5038 | ||
5039 | text = "shape: SQUARE posn: upper left color: BLACK" | |
5040 | attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] | |
5041 | ||
5042 | # use stop_on attribute for OneOrMore to avoid reading label string as part of the data | |
5043 | attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) | |
5044 | OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] | |
5045 | ||
5046 | # could also be written as | |
5047 | (attr_expr * (1,)).parse_string(text).pprint() | |
5048 | """ | |
5049 | ||
5050 | def _generateDefaultName(self) -> str: | |
5051 | return "{" + str(self.expr) + "}..." | |
5052 | ||
5053 | ||
5054 | class ZeroOrMore(_MultipleMatch): | |
5055 | """ | |
5056 | Optional repetition of zero or more of the given expression. | |
5057 | ||
5058 | Parameters: | |
5059 | ||
5060 | - ``expr`` - expression that must match zero or more times | |
5061 | - ``stop_on`` - expression for a terminating sentinel | |
5062 | (only required if the sentinel would ordinarily match the repetition | |
5063 | expression) - (default= ``None``) | |
5064 | ||
5065 | Example: similar to :class:`OneOrMore` | |
5066 | """ | |
5067 | ||
5068 | def __init__( | |
5069 | self, | |
5070 | expr: Union[str, ParserElement], | |
5071 | stop_on: typing.Optional[Union[ParserElement, str]] = None, | |
5072 | *, | |
5073 | stopOn: typing.Optional[Union[ParserElement, str]] = None, | |
5074 | ): | |
5075 | super().__init__(expr, stopOn=stopOn or stop_on) | |
5076 | self.mayReturnEmpty = True | |
5077 | ||
5078 | def parseImpl(self, instring, loc, doActions=True): | |
5079 | try: | |
5080 | return super().parseImpl(instring, loc, doActions) | |
5081 | except (ParseException, IndexError): | |
5082 | return loc, ParseResults([], name=self.resultsName) | |
5083 | ||
5084 | def _generateDefaultName(self) -> str: | |
5085 | return "[" + str(self.expr) + "]..." | |
5086 | ||
5087 | ||
5088 | class DelimitedList(ParseElementEnhance): | |
5089 | def __init__( | |
5090 | self, | |
5091 | expr: Union[str, ParserElement], | |
5092 | delim: Union[str, ParserElement] = ",", | |
5093 | combine: bool = False, | |
5094 | min: typing.Optional[int] = None, | |
5095 | max: typing.Optional[int] = None, | |
5096 | *, | |
5097 | allow_trailing_delim: bool = False, | |
5098 | ): | |
5099 | """Helper to define a delimited list of expressions - the delimiter | |
5100 | defaults to ','. By default, the list elements and delimiters can | |
5101 | have intervening whitespace, and comments, but this can be | |
5102 | overridden by passing ``combine=True`` in the constructor. If | |
5103 | ``combine`` is set to ``True``, the matching tokens are | |
5104 | returned as a single token string, with the delimiters included; | |
5105 | otherwise, the matching tokens are returned as a list of tokens, | |
5106 | with the delimiters suppressed. | |
5107 | ||
5108 | If ``allow_trailing_delim`` is set to True, then the list may end with | |
5109 | a delimiter. | |
5110 | ||
5111 | Example:: | |
5112 | ||
5113 | DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] | |
5114 | DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] | |
5115 | """ | |
5116 | if isinstance(expr, str_type): | |
5117 | expr = ParserElement._literalStringClass(expr) | |
5118 | expr = typing.cast(ParserElement, expr) | |
5119 | ||
5120 | if min is not None: | |
5121 | if min < 1: | |
5122 | raise ValueError("min must be greater than 0") | |
5123 | if max is not None: | |
5124 | if min is not None and max < min: | |
5125 | raise ValueError("max must be greater than, or equal to min") | |
5126 | ||
5127 | self.content = expr | |
5128 | self.raw_delim = str(delim) | |
5129 | self.delim = delim | |
5130 | self.combine = combine | |
5131 | if not combine: | |
5132 | self.delim = Suppress(delim) | |
5133 | self.min = min or 1 | |
5134 | self.max = max | |
5135 | self.allow_trailing_delim = allow_trailing_delim | |
5136 | ||
5137 | delim_list_expr = self.content + (self.delim + self.content) * ( | |
5138 | self.min - 1, | |
5139 | None if self.max is None else self.max - 1, | |
5140 | ) | |
5141 | if self.allow_trailing_delim: | |
5142 | delim_list_expr += Opt(self.delim) | |
5143 | ||
5144 | if self.combine: | |
5145 | delim_list_expr = Combine(delim_list_expr) | |
5146 | ||
5147 | super().__init__(delim_list_expr, savelist=True) | |
5148 | ||
5149 | def _generateDefaultName(self) -> str: | |
5150 | return "{0} [{1} {0}]...".format(self.content.streamline(), self.raw_delim) | |
5151 | ||
5152 | ||
5153 | class _NullToken: | |
5154 | def __bool__(self): | |
5155 | return False | |
5156 | ||
5157 | def __str__(self): | |
5158 | return "" | |
5159 | ||
5160 | ||
5161 | class Opt(ParseElementEnhance): | |
5162 | """ | |
5163 | Optional matching of the given expression. | |
5164 | ||
5165 | Parameters: | |
5166 | ||
5167 | - ``expr`` - expression that must match zero or more times | |
5168 | - ``default`` (optional) - value to be returned if the optional expression is not found. | |
5169 | ||
5170 | Example:: | |
5171 | ||
5172 | # US postal code can be a 5-digit zip, plus optional 4-digit qualifier | |
5173 | zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) | |
5174 | zip.run_tests(''' | |
5175 | # traditional ZIP code | |
5176 | 12345 | |
5177 | ||
5178 | # ZIP+4 form | |
5179 | 12101-0001 | |
5180 | ||
5181 | # invalid ZIP | |
5182 | 98765- | |
5183 | ''') | |
5184 | ||
5185 | prints:: | |
5186 | ||
5187 | # traditional ZIP code | |
5188 | 12345 | |
5189 | ['12345'] | |
5190 | ||
5191 | # ZIP+4 form | |
5192 | 12101-0001 | |
5193 | ['12101-0001'] | |
5194 | ||
5195 | # invalid ZIP | |
5196 | 98765- | |
5197 | ^ | |
5198 | FAIL: Expected end of text (at char 5), (line:1, col:6) | |
5199 | """ | |
5200 | ||
5201 | __optionalNotMatched = _NullToken() | |
5202 | ||
5203 | def __init__( | |
5204 | self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched | |
5205 | ): | |
5206 | super().__init__(expr, savelist=False) | |
5207 | self.saveAsList = self.expr.saveAsList | |
5208 | self.defaultValue = default | |
5209 | self.mayReturnEmpty = True | |
5210 | ||
5211 | def parseImpl(self, instring, loc, doActions=True): | |
5212 | self_expr = self.expr | |
5213 | try: | |
5214 | loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) | |
5215 | except (ParseException, IndexError): | |
5216 | default_value = self.defaultValue | |
5217 | if default_value is not self.__optionalNotMatched: | |
5218 | if self_expr.resultsName: | |
5219 | tokens = ParseResults([default_value]) | |
5220 | tokens[self_expr.resultsName] = default_value | |
5221 | else: | |
5222 | tokens = [default_value] | |
5223 | else: | |
5224 | tokens = [] | |
5225 | return loc, tokens | |
5226 | ||
5227 | def _generateDefaultName(self) -> str: | |
5228 | inner = str(self.expr) | |
5229 | # strip off redundant inner {}'s | |
5230 | while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": | |
5231 | inner = inner[1:-1] | |
5232 | return "[" + inner + "]" | |
5233 | ||
5234 | ||
5235 | Optional = Opt | |
5236 | ||
5237 | ||
5238 | class SkipTo(ParseElementEnhance): | |
5239 | """ | |
5240 | Token for skipping over all undefined text until the matched | |
5241 | expression is found. | |
5242 | ||
5243 | Parameters: | |
5244 | ||
5245 | - ``expr`` - target expression marking the end of the data to be skipped | |
5246 | - ``include`` - if ``True``, the target expression is also parsed | |
5247 | (the skipped text and target expression are returned as a 2-element | |
5248 | list) (default= ``False``). | |
5249 | - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and | |
5250 | comments) that might contain false matches to the target expression | |
5251 | - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be | |
5252 | included in the skipped test; if found before the target expression is found, | |
5253 | the :class:`SkipTo` is not a match | |
5254 | ||
5255 | Example:: | |
5256 | ||
5257 | report = ''' | |
5258 | Outstanding Issues Report - 1 Jan 2000 | |
5259 | ||
5260 | # | Severity | Description | Days Open | |
5261 | -----+----------+-------------------------------------------+----------- | |
5262 | 101 | Critical | Intermittent system crash | 6 | |
5263 | 94 | Cosmetic | Spelling error on Login ('log|n') | 14 | |
5264 | 79 | Minor | System slow when running too many reports | 47 | |
5265 | ''' | |
5266 | integer = Word(nums) | |
5267 | SEP = Suppress('|') | |
5268 | # use SkipTo to simply match everything up until the next SEP | |
5269 | # - ignore quoted strings, so that a '|' character inside a quoted string does not match | |
5270 | # - parse action will call token.strip() for each matched token, i.e., the description body | |
5271 | string_data = SkipTo(SEP, ignore=quoted_string) | |
5272 | string_data.set_parse_action(token_map(str.strip)) | |
5273 | ticket_expr = (integer("issue_num") + SEP | |
5274 | + string_data("sev") + SEP | |
5275 | + string_data("desc") + SEP | |
5276 | + integer("days_open")) | |
5277 | ||
5278 | for tkt in ticket_expr.search_string(report): | |
5279 | print tkt.dump() | |
5280 | ||
5281 | prints:: | |
5282 | ||
5283 | ['101', 'Critical', 'Intermittent system crash', '6'] | |
5284 | - days_open: '6' | |
5285 | - desc: 'Intermittent system crash' | |
5286 | - issue_num: '101' | |
5287 | - sev: 'Critical' | |
5288 | ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] | |
5289 | - days_open: '14' | |
5290 | - desc: "Spelling error on Login ('log|n')" | |
5291 | - issue_num: '94' | |
5292 | - sev: 'Cosmetic' | |
5293 | ['79', 'Minor', 'System slow when running too many reports', '47'] | |
5294 | - days_open: '47' | |
5295 | - desc: 'System slow when running too many reports' | |
5296 | - issue_num: '79' | |
5297 | - sev: 'Minor' | |
5298 | """ | |
5299 | ||
5300 | def __init__( | |
5301 | self, | |
5302 | other: Union[ParserElement, str], | |
5303 | include: bool = False, | |
5304 | ignore: typing.Optional[Union[ParserElement, str]] = None, | |
5305 | fail_on: typing.Optional[Union[ParserElement, str]] = None, | |
5306 | *, | |
5307 | failOn: typing.Optional[Union[ParserElement, str]] = None, | |
5308 | ): | |
5309 | super().__init__(other) | |
5310 | failOn = failOn or fail_on | |
5311 | if ignore is not None: | |
5312 | self.ignore(ignore) | |
5313 | self.mayReturnEmpty = True | |
5314 | self.mayIndexError = False | |
5315 | self.includeMatch = include | |
5316 | self.saveAsList = False | |
5317 | if isinstance(failOn, str_type): | |
5318 | self.failOn = self._literalStringClass(failOn) | |
5319 | else: | |
5320 | self.failOn = failOn | |
5321 | self.errmsg = "No match found for " + str(self.expr) | |
5322 | ||
5323 | def parseImpl(self, instring, loc, doActions=True): | |
5324 | startloc = loc | |
5325 | instrlen = len(instring) | |
5326 | self_expr_parse = self.expr._parse | |
5327 | self_failOn_canParseNext = ( | |
5328 | self.failOn.canParseNext if self.failOn is not None else None | |
5329 | ) | |
5330 | self_preParse = self.preParse if self.callPreparse else None | |
5331 | ||
5332 | tmploc = loc | |
5333 | while tmploc <= instrlen: | |
5334 | if self_failOn_canParseNext is not None: | |
5335 | # break if failOn expression matches | |
5336 | if self_failOn_canParseNext(instring, tmploc): | |
5337 | break | |
5338 | ||
5339 | if self_preParse is not None: | |
5340 | # skip grammar-ignored expressions | |
5341 | tmploc = self_preParse(instring, tmploc) | |
5342 | ||
5343 | try: | |
5344 | self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) | |
5345 | except (ParseException, IndexError): | |
5346 | # no match, advance loc in string | |
5347 | tmploc += 1 | |
5348 | else: | |
5349 | # matched skipto expr, done | |
5350 | break | |
5351 | ||
5352 | else: | |
5353 | # ran off the end of the input string without matching skipto expr, fail | |
5354 | raise ParseException(instring, loc, self.errmsg, self) | |
5355 | ||
5356 | # build up return values | |
5357 | loc = tmploc | |
5358 | skiptext = instring[startloc:loc] | |
5359 | skipresult = ParseResults(skiptext) | |
5360 | ||
5361 | if self.includeMatch: | |
5362 | loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) | |
5363 | skipresult += mat | |
5364 | ||
5365 | return loc, skipresult | |
5366 | ||
5367 | ||
5368 | class Forward(ParseElementEnhance): | |
5369 | """ | |
5370 | Forward declaration of an expression to be defined later - | |
5371 | used for recursive grammars, such as algebraic infix notation. | |
5372 | When the expression is known, it is assigned to the ``Forward`` | |
5373 | variable using the ``'<<'`` operator. | |
5374 | ||
5375 | Note: take care when assigning to ``Forward`` not to overlook | |
5376 | precedence of operators. | |
5377 | ||
5378 | Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: | |
5379 | ||
5380 | fwd_expr << a | b | c | |
5381 | ||
5382 | will actually be evaluated as:: | |
5383 | ||
5384 | (fwd_expr << a) | b | c | |
5385 | ||
5386 | thereby leaving b and c out as parseable alternatives. It is recommended that you | |
5387 | explicitly group the values inserted into the ``Forward``:: | |
5388 | ||
5389 | fwd_expr << (a | b | c) | |
5390 | ||
5391 | Converting to use the ``'<<='`` operator instead will avoid this problem. | |
5392 | ||
5393 | See :class:`ParseResults.pprint` for an example of a recursive | |
5394 | parser created using ``Forward``. | |
5395 | """ | |
5396 | ||
5397 | def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): | |
5398 | self.caller_frame = traceback.extract_stack(limit=2)[0] | |
5399 | super().__init__(other, savelist=False) # type: ignore[arg-type] | |
5400 | self.lshift_line = None | |
5401 | ||
5402 | def __lshift__(self, other) -> "Forward": | |
5403 | if hasattr(self, "caller_frame"): | |
5404 | del self.caller_frame | |
5405 | if isinstance(other, str_type): | |
5406 | other = self._literalStringClass(other) | |
5407 | ||
5408 | if not isinstance(other, ParserElement): | |
5409 | return NotImplemented | |
5410 | ||
5411 | self.expr = other | |
5412 | self.streamlined = other.streamlined | |
5413 | self.mayIndexError = self.expr.mayIndexError | |
5414 | self.mayReturnEmpty = self.expr.mayReturnEmpty | |
5415 | self.set_whitespace_chars( | |
5416 | self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars | |
5417 | ) | |
5418 | self.skipWhitespace = self.expr.skipWhitespace | |
5419 | self.saveAsList = self.expr.saveAsList | |
5420 | self.ignoreExprs.extend(self.expr.ignoreExprs) | |
5421 | self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] | |
5422 | return self | |
5423 | ||
5424 | def __ilshift__(self, other) -> "Forward": | |
5425 | if not isinstance(other, ParserElement): | |
5426 | return NotImplemented | |
5427 | ||
5428 | return self << other | |
5429 | ||
5430 | def __or__(self, other) -> "ParserElement": | |
5431 | caller_line = traceback.extract_stack(limit=2)[-2] | |
5432 | if ( | |
5433 | __diag__.warn_on_match_first_with_lshift_operator | |
5434 | and caller_line == self.lshift_line | |
5435 | and Diagnostics.warn_on_match_first_with_lshift_operator | |
5436 | not in self.suppress_warnings_ | |
5437 | ): | |
5438 | warnings.warn( | |
5439 | "using '<<' operator with '|' is probably an error, use '<<='", | |
5440 | stacklevel=2, | |
5441 | ) | |
5442 | ret = super().__or__(other) | |
5443 | return ret | |
5444 | ||
5445 | def __del__(self): | |
5446 | # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' | |
5447 | if ( | |
5448 | self.expr is None | |
5449 | and __diag__.warn_on_assignment_to_Forward | |
5450 | and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ | |
5451 | ): | |
5452 | warnings.warn_explicit( | |
5453 | "Forward defined here but no expression attached later using '<<=' or '<<'", | |
5454 | UserWarning, | |
5455 | filename=self.caller_frame.filename, | |
5456 | lineno=self.caller_frame.lineno, | |
5457 | ) | |
5458 | ||
5459 | def parseImpl(self, instring, loc, doActions=True): | |
5460 | if ( | |
5461 | self.expr is None | |
5462 | and __diag__.warn_on_parse_using_empty_Forward | |
5463 | and Diagnostics.warn_on_parse_using_empty_Forward | |
5464 | not in self.suppress_warnings_ | |
5465 | ): | |
5466 | # walk stack until parse_string, scan_string, search_string, or transform_string is found | |
5467 | parse_fns = ( | |
5468 | "parse_string", | |
5469 | "scan_string", | |
5470 | "search_string", | |
5471 | "transform_string", | |
5472 | ) | |
5473 | tb = traceback.extract_stack(limit=200) | |
5474 | for i, frm in enumerate(reversed(tb), start=1): | |
5475 | if frm.name in parse_fns: | |
5476 | stacklevel = i + 1 | |
5477 | break | |
5478 | else: | |
5479 | stacklevel = 2 | |
5480 | warnings.warn( | |
5481 | "Forward expression was never assigned a value, will not parse any input", | |
5482 | stacklevel=stacklevel, | |
5483 | ) | |
5484 | if not ParserElement._left_recursion_enabled: | |
5485 | return super().parseImpl(instring, loc, doActions) | |
5486 | # ## Bounded Recursion algorithm ## | |
5487 | # Recursion only needs to be processed at ``Forward`` elements, since they are | |
5488 | # the only ones that can actually refer to themselves. The general idea is | |
5489 | # to handle recursion stepwise: We start at no recursion, then recurse once, | |
5490 | # recurse twice, ..., until more recursion offers no benefit (we hit the bound). | |
5491 | # | |
5492 | # The "trick" here is that each ``Forward`` gets evaluated in two contexts | |
5493 | # - to *match* a specific recursion level, and | |
5494 | # - to *search* the bounded recursion level | |
5495 | # and the two run concurrently. The *search* must *match* each recursion level | |
5496 | # to find the best possible match. This is handled by a memo table, which | |
5497 | # provides the previous match to the next level match attempt. | |
5498 | # | |
5499 | # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. | |
5500 | # | |
5501 | # There is a complication since we not only *parse* but also *transform* via | |
5502 | # actions: We do not want to run the actions too often while expanding. Thus, | |
5503 | # we expand using `doActions=False` and only run `doActions=True` if the next | |
5504 | # recursion level is acceptable. | |
5505 | with ParserElement.recursion_lock: | |
5506 | memo = ParserElement.recursion_memos | |
5507 | try: | |
5508 | # we are parsing at a specific recursion expansion - use it as-is | |
5509 | prev_loc, prev_result = memo[loc, self, doActions] | |
5510 | if isinstance(prev_result, Exception): | |
5511 | raise prev_result | |
5512 | return prev_loc, prev_result.copy() | |
5513 | except KeyError: | |
5514 | act_key = (loc, self, True) | |
5515 | peek_key = (loc, self, False) | |
5516 | # we are searching for the best recursion expansion - keep on improving | |
5517 | # both `doActions` cases must be tracked separately here! | |
5518 | prev_loc, prev_peek = memo[peek_key] = ( | |
5519 | loc - 1, | |
5520 | ParseException( | |
5521 | instring, loc, "Forward recursion without base case", self | |
5522 | ), | |
5523 | ) | |
5524 | if doActions: | |
5525 | memo[act_key] = memo[peek_key] | |
5526 | while True: | |
5527 | try: | |
5528 | new_loc, new_peek = super().parseImpl(instring, loc, False) | |
5529 | except ParseException: | |
5530 | # we failed before getting any match – do not hide the error | |
5531 | if isinstance(prev_peek, Exception): | |
5532 | raise | |
5533 | new_loc, new_peek = prev_loc, prev_peek | |
5534 | # the match did not get better: we are done | |
5535 | if new_loc <= prev_loc: | |
5536 | if doActions: | |
5537 | # replace the match for doActions=False as well, | |
5538 | # in case the action did backtrack | |
5539 | prev_loc, prev_result = memo[peek_key] = memo[act_key] | |
5540 | del memo[peek_key], memo[act_key] | |
5541 | return prev_loc, prev_result.copy() | |
5542 | del memo[peek_key] | |
5543 | return prev_loc, prev_peek.copy() | |
5544 | # the match did get better: see if we can improve further | |
5545 | else: | |
5546 | if doActions: | |
5547 | try: | |
5548 | memo[act_key] = super().parseImpl(instring, loc, True) | |
5549 | except ParseException as e: | |
5550 | memo[peek_key] = memo[act_key] = (new_loc, e) | |
5551 | raise | |
5552 | prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek | |
5553 | ||
5554 | def leave_whitespace(self, recursive: bool = True) -> ParserElement: | |
5555 | self.skipWhitespace = False | |
5556 | return self | |
5557 | ||
5558 | def ignore_whitespace(self, recursive: bool = True) -> ParserElement: | |
5559 | self.skipWhitespace = True | |
5560 | return self | |
5561 | ||
5562 | def streamline(self) -> ParserElement: | |
5563 | if not self.streamlined: | |
5564 | self.streamlined = True | |
5565 | if self.expr is not None: | |
5566 | self.expr.streamline() | |
5567 | return self | |
5568 | ||
5569 | def validate(self, validateTrace=None) -> None: | |
5570 | warnings.warn( | |
5571 | "ParserElement.validate() is deprecated, and should not be used to check for left recursion", | |
5572 | DeprecationWarning, | |
5573 | stacklevel=2, | |
5574 | ) | |
5575 | if validateTrace is None: | |
5576 | validateTrace = [] | |
5577 | ||
5578 | if self not in validateTrace: | |
5579 | tmp = validateTrace[:] + [self] | |
5580 | if self.expr is not None: | |
5581 | self.expr.validate(tmp) | |
5582 | self._checkRecursion([]) | |
5583 | ||
5584 | def _generateDefaultName(self) -> str: | |
5585 | # Avoid infinite recursion by setting a temporary _defaultName | |
5586 | self._defaultName = ": ..." | |
5587 | ||
5588 | # Use the string representation of main expression. | |
5589 | retString = "..." | |
5590 | try: | |
5591 | if self.expr is not None: | |
5592 | retString = str(self.expr)[:1000] | |
5593 | else: | |
5594 | retString = "None" | |
5595 | finally: | |
5596 | return self.__class__.__name__ + ": " + retString | |
5597 | ||
5598 | def copy(self) -> ParserElement: | |
5599 | if self.expr is not None: | |
5600 | return super().copy() | |
5601 | else: | |
5602 | ret = Forward() | |
5603 | ret <<= self | |
5604 | return ret | |
5605 | ||
5606 | def _setResultsName(self, name, list_all_matches=False): | |
5607 | if ( | |
5608 | __diag__.warn_name_set_on_empty_Forward | |
5609 | and Diagnostics.warn_name_set_on_empty_Forward | |
5610 | not in self.suppress_warnings_ | |
5611 | ): | |
5612 | if self.expr is None: | |
5613 | warnings.warn( | |
5614 | "{}: setting results name {!r} on {} expression " | |
5615 | "that has no contained expression".format( | |
5616 | "warn_name_set_on_empty_Forward", name, type(self).__name__ | |
5617 | ), | |
5618 | stacklevel=3, | |
5619 | ) | |
5620 | ||
5621 | return super()._setResultsName(name, list_all_matches) | |
5622 | ||
5623 | # Compatibility synonyms | |
5624 | # fmt: off | |
5625 | @replaced_by_pep8(leave_whitespace) | |
5626 | def leaveWhitespace(self): ... | |
5627 | ||
5628 | @replaced_by_pep8(ignore_whitespace) | |
5629 | def ignoreWhitespace(self): ... | |
5630 | # fmt: on | |
5631 | ||
5632 | ||
5633 | class TokenConverter(ParseElementEnhance): | |
5634 | """ | |
5635 | Abstract subclass of :class:`ParseExpression`, for converting parsed results. | |
5636 | """ | |
5637 | ||
5638 | def __init__(self, expr: Union[ParserElement, str], savelist=False): | |
5639 | super().__init__(expr) # , savelist) | |
5640 | self.saveAsList = False | |
5641 | ||
5642 | ||
5643 | class Combine(TokenConverter): | |
5644 | """Converter to concatenate all matching tokens to a single string. | |
5645 | By default, the matching patterns must also be contiguous in the | |
5646 | input string; this can be disabled by specifying | |
5647 | ``'adjacent=False'`` in the constructor. | |
5648 | ||
5649 | Example:: | |
5650 | ||
5651 | real = Word(nums) + '.' + Word(nums) | |
5652 | print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] | |
5653 | # will also erroneously match the following | |
5654 | print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] | |
5655 | ||
5656 | real = Combine(Word(nums) + '.' + Word(nums)) | |
5657 | print(real.parse_string('3.1416')) # -> ['3.1416'] | |
5658 | # no match when there are internal spaces | |
5659 | print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) | |
5660 | """ | |
5661 | ||
5662 | def __init__( | |
5663 | self, | |
5664 | expr: ParserElement, | |
5665 | join_string: str = "", | |
5666 | adjacent: bool = True, | |
5667 | *, | |
5668 | joinString: typing.Optional[str] = None, | |
5669 | ): | |
5670 | super().__init__(expr) | |
5671 | joinString = joinString if joinString is not None else join_string | |
5672 | # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself | |
5673 | if adjacent: | |
5674 | self.leave_whitespace() | |
5675 | self.adjacent = adjacent | |
5676 | self.skipWhitespace = True | |
5677 | self.joinString = joinString | |
5678 | self.callPreparse = True | |
5679 | ||
5680 | def ignore(self, other) -> ParserElement: | |
5681 | if self.adjacent: | |
5682 | ParserElement.ignore(self, other) | |
5683 | else: | |
5684 | super().ignore(other) | |
5685 | return self | |
5686 | ||
5687 | def postParse(self, instring, loc, tokenlist): | |
5688 | retToks = tokenlist.copy() | |
5689 | del retToks[:] | |
5690 | retToks += ParseResults( | |
5691 | ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults | |
5692 | ) | |
5693 | ||
5694 | if self.resultsName and retToks.haskeys(): | |
5695 | return [retToks] | |
5696 | else: | |
5697 | return retToks | |
5698 | ||
5699 | ||
5700 | class Group(TokenConverter): | |
5701 | """Converter to return the matched tokens as a list - useful for | |
5702 | returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. | |
5703 | ||
5704 | The optional ``aslist`` argument when set to True will return the | |
5705 | parsed tokens as a Python list instead of a pyparsing ParseResults. | |
5706 | ||
5707 | Example:: | |
5708 | ||
5709 | ident = Word(alphas) | |
5710 | num = Word(nums) | |
5711 | term = ident | num | |
5712 | func = ident + Opt(DelimitedList(term)) | |
5713 | print(func.parse_string("fn a, b, 100")) | |
5714 | # -> ['fn', 'a', 'b', '100'] | |
5715 | ||
5716 | func = ident + Group(Opt(DelimitedList(term))) | |
5717 | print(func.parse_string("fn a, b, 100")) | |
5718 | # -> ['fn', ['a', 'b', '100']] | |
5719 | """ | |
5720 | ||
5721 | def __init__(self, expr: ParserElement, aslist: bool = False): | |
5722 | super().__init__(expr) | |
5723 | self.saveAsList = True | |
5724 | self._asPythonList = aslist | |
5725 | ||
5726 | def postParse(self, instring, loc, tokenlist): | |
5727 | if self._asPythonList: | |
5728 | return ParseResults.List( | |
5729 | tokenlist.asList() | |
5730 | if isinstance(tokenlist, ParseResults) | |
5731 | else list(tokenlist) | |
5732 | ) | |
5733 | else: | |
5734 | return [tokenlist] | |
5735 | ||
5736 | ||
5737 | class Dict(TokenConverter): | |
5738 | """Converter to return a repetitive expression as a list, but also | |
5739 | as a dictionary. Each element can also be referenced using the first | |
5740 | token in the expression as its key. Useful for tabular report | |
5741 | scraping when the first column can be used as a item key. | |
5742 | ||
5743 | The optional ``asdict`` argument when set to True will return the | |
5744 | parsed tokens as a Python dict instead of a pyparsing ParseResults. | |
5745 | ||
5746 | Example:: | |
5747 | ||
5748 | data_word = Word(alphas) | |
5749 | label = data_word + FollowedBy(':') | |
5750 | ||
5751 | text = "shape: SQUARE posn: upper left color: light blue texture: burlap" | |
5752 | attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) | |
5753 | ||
5754 | # print attributes as plain groups | |
5755 | print(attr_expr[1, ...].parse_string(text).dump()) | |
5756 | ||
5757 | # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names | |
5758 | result = Dict(Group(attr_expr)[1, ...]).parse_string(text) | |
5759 | print(result.dump()) | |
5760 | ||
5761 | # access named fields as dict entries, or output as dict | |
5762 | print(result['shape']) | |
5763 | print(result.as_dict()) | |
5764 | ||
5765 | prints:: | |
5766 | ||
5767 | ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] | |
5768 | [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] | |
5769 | - color: 'light blue' | |
5770 | - posn: 'upper left' | |
5771 | - shape: 'SQUARE' | |
5772 | - texture: 'burlap' | |
5773 | SQUARE | |
5774 | {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} | |
5775 | ||
5776 | See more examples at :class:`ParseResults` of accessing fields by results name. | |
5777 | """ | |
5778 | ||
5779 | def __init__(self, expr: ParserElement, asdict: bool = False): | |
5780 | super().__init__(expr) | |
5781 | self.saveAsList = True | |
5782 | self._asPythonDict = asdict | |
5783 | ||
5784 | def postParse(self, instring, loc, tokenlist): | |
5785 | for i, tok in enumerate(tokenlist): | |
5786 | if len(tok) == 0: | |
5787 | continue | |
5788 | ||
5789 | ikey = tok[0] | |
5790 | if isinstance(ikey, int): | |
5791 | ikey = str(ikey).strip() | |
5792 | ||
5793 | if len(tok) == 1: | |
5794 | tokenlist[ikey] = _ParseResultsWithOffset("", i) | |
5795 | ||
5796 | elif len(tok) == 2 and not isinstance(tok[1], ParseResults): | |
5797 | tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) | |
5798 | ||
5799 | else: | |
5800 | try: | |
5801 | dictvalue = tok.copy() # ParseResults(i) | |
5802 | except Exception: | |
5803 | exc = TypeError( | |
5804 | "could not extract dict values from parsed results" | |
5805 | " - Dict expression must contain Grouped expressions" | |
5806 | ) | |
5807 | raise exc from None | |
5808 | ||
5809 | del dictvalue[0] | |
5810 | ||
5811 | if len(dictvalue) != 1 or ( | |
5812 | isinstance(dictvalue, ParseResults) and dictvalue.haskeys() | |
5813 | ): | |
5814 | tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) | |
5815 | else: | |
5816 | tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) | |
5817 | ||
5818 | if self._asPythonDict: | |
5819 | return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() | |
5820 | else: | |
5821 | return [tokenlist] if self.resultsName else tokenlist | |
5822 | ||
5823 | ||
5824 | class Suppress(TokenConverter): | |
5825 | """Converter for ignoring the results of a parsed expression. | |
5826 | ||
5827 | Example:: | |
5828 | ||
5829 | source = "a, b, c,d" | |
5830 | wd = Word(alphas) | |
5831 | wd_list1 = wd + (',' + wd)[...] | |
5832 | print(wd_list1.parse_string(source)) | |
5833 | ||
5834 | # often, delimiters that are useful during parsing are just in the | |
5835 | # way afterward - use Suppress to keep them out of the parsed output | |
5836 | wd_list2 = wd + (Suppress(',') + wd)[...] | |
5837 | print(wd_list2.parse_string(source)) | |
5838 | ||
5839 | # Skipped text (using '...') can be suppressed as well | |
5840 | source = "lead in START relevant text END trailing text" | |
5841 | start_marker = Keyword("START") | |
5842 | end_marker = Keyword("END") | |
5843 | find_body = Suppress(...) + start_marker + ... + end_marker | |
5844 | print(find_body.parse_string(source) | |
5845 | ||
5846 | prints:: | |
5847 | ||
5848 | ['a', ',', 'b', ',', 'c', ',', 'd'] | |
5849 | ['a', 'b', 'c', 'd'] | |
5850 | ['START', 'relevant text ', 'END'] | |
5851 | ||
5852 | (See also :class:`DelimitedList`.) | |
5853 | """ | |
5854 | ||
5855 | def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): | |
5856 | if expr is ...: | |
5857 | expr = _PendingSkip(NoMatch()) | |
5858 | super().__init__(expr) | |
5859 | ||
5860 | def __add__(self, other) -> "ParserElement": | |
5861 | if isinstance(self.expr, _PendingSkip): | |
5862 | return Suppress(SkipTo(other)) + other | |
5863 | else: | |
5864 | return super().__add__(other) | |
5865 | ||
5866 | def __sub__(self, other) -> "ParserElement": | |
5867 | if isinstance(self.expr, _PendingSkip): | |
5868 | return Suppress(SkipTo(other)) - other | |
5869 | else: | |
5870 | return super().__sub__(other) | |
5871 | ||
5872 | def postParse(self, instring, loc, tokenlist): | |
5873 | return [] | |
5874 | ||
5875 | def suppress(self) -> ParserElement: | |
5876 | return self | |
5877 | ||
5878 | ||
5879 | def trace_parse_action(f: ParseAction) -> ParseAction: | |
5880 | """Decorator for debugging parse actions. | |
5881 | ||
5882 | When the parse action is called, this decorator will print | |
5883 | ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. | |
5884 | When the parse action completes, the decorator will print | |
5885 | ``"<<"`` followed by the returned value, or any exception that the parse action raised. | |
5886 | ||
5887 | Example:: | |
5888 | ||
5889 | wd = Word(alphas) | |
5890 | ||
5891 | @trace_parse_action | |
5892 | def remove_duplicate_chars(tokens): | |
5893 | return ''.join(sorted(set(''.join(tokens)))) | |
5894 | ||
5895 | wds = wd[1, ...].set_parse_action(remove_duplicate_chars) | |
5896 | print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) | |
5897 | ||
5898 | prints:: | |
5899 | ||
5900 | >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) | |
5901 | <<leaving remove_duplicate_chars (ret: 'dfjkls') | |
5902 | ['dfjkls'] | |
5903 | """ | |
5904 | f = _trim_arity(f) | |
5905 | ||
5906 | def z(*paArgs): | |
5907 | thisFunc = f.__name__ | |
5908 | s, l, t = paArgs[-3:] | |
5909 | if len(paArgs) > 3: | |
5910 | thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc | |
5911 | sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") | |
5912 | try: | |
5913 | ret = f(*paArgs) | |
5914 | except Exception as exc: | |
5915 | sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n") | |
5916 | raise | |
5917 | sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") | |
5918 | return ret | |
5919 | ||
5920 | z.__name__ = f.__name__ | |
5921 | return z | |
5922 | ||
5923 | ||
5924 | # convenience constants for positional expressions | |
5925 | empty = Empty().set_name("empty") | |
5926 | line_start = LineStart().set_name("line_start") | |
5927 | line_end = LineEnd().set_name("line_end") | |
5928 | string_start = StringStart().set_name("string_start") | |
5929 | string_end = StringEnd().set_name("string_end") | |
5930 | ||
5931 | _escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( | |
5932 | lambda s, l, t: t[0][1] | |
5933 | ) | |
5934 | _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( | |
5935 | lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) | |
5936 | ) | |
5937 | _escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( | |
5938 | lambda s, l, t: chr(int(t[0][1:], 8)) | |
5939 | ) | |
5940 | _singleChar = ( | |
5941 | _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) | |
5942 | ) | |
5943 | _charRange = Group(_singleChar + Suppress("-") + _singleChar) | |
5944 | _reBracketExpr = ( | |
5945 | Literal("[") | |
5946 | + Opt("^").set_results_name("negate") | |
5947 | + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") | |
5948 | + Literal("]") | |
5949 | ) | |
5950 | ||
5951 | ||
5952 | def srange(s: str) -> str: | |
5953 | r"""Helper to easily define string ranges for use in :class:`Word` | |
5954 | construction. Borrows syntax from regexp ``'[]'`` string range | |
5955 | definitions:: | |
5956 | ||
5957 | srange("[0-9]") -> "0123456789" | |
5958 | srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" | |
5959 | srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" | |
5960 | ||
5961 | The input string must be enclosed in []'s, and the returned string | |
5962 | is the expanded character set joined into a single string. The | |
5963 | values enclosed in the []'s may be: | |
5964 | ||
5965 | - a single character | |
5966 | - an escaped character with a leading backslash (such as ``\-`` | |
5967 | or ``\]``) | |
5968 | - an escaped hex character with a leading ``'\x'`` | |
5969 | (``\x21``, which is a ``'!'`` character) (``\0x##`` | |
5970 | is also supported for backwards compatibility) | |
5971 | - an escaped octal character with a leading ``'\0'`` | |
5972 | (``\041``, which is a ``'!'`` character) | |
5973 | - a range of any of the above, separated by a dash (``'a-z'``, | |
5974 | etc.) | |
5975 | - any combination of the above (``'aeiouy'``, | |
5976 | ``'a-zA-Z0-9_$'``, etc.) | |
5977 | """ | |
5978 | _expanded = ( | |
5979 | lambda p: p | |
5980 | if not isinstance(p, ParseResults) | |
5981 | else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) | |
5982 | ) | |
5983 | try: | |
5984 | return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) | |
5985 | except Exception as e: | |
5986 | return "" | |
5987 | ||
5988 | ||
5989 | def token_map(func, *args) -> ParseAction: | |
5990 | """Helper to define a parse action by mapping a function to all | |
5991 | elements of a :class:`ParseResults` list. If any additional args are passed, | |
5992 | they are forwarded to the given function as additional arguments | |
5993 | after the token, as in | |
5994 | ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, | |
5995 | which will convert the parsed data to an integer using base 16. | |
5996 | ||
5997 | Example (compare the last to example in :class:`ParserElement.transform_string`:: | |
5998 | ||
5999 | hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) | |
6000 | hex_ints.run_tests(''' | |
6001 | 00 11 22 aa FF 0a 0d 1a | |
6002 | ''') | |
6003 | ||
6004 | upperword = Word(alphas).set_parse_action(token_map(str.upper)) | |
6005 | upperword[1, ...].run_tests(''' | |
6006 | my kingdom for a horse | |
6007 | ''') | |
6008 | ||
6009 | wd = Word(alphas).set_parse_action(token_map(str.title)) | |
6010 | wd[1, ...].set_parse_action(' '.join).run_tests(''' | |
6011 | now is the winter of our discontent made glorious summer by this sun of york | |
6012 | ''') | |
6013 | ||
6014 | prints:: | |
6015 | ||
6016 | 00 11 22 aa FF 0a 0d 1a | |
6017 | [0, 17, 34, 170, 255, 10, 13, 26] | |
6018 | ||
6019 | my kingdom for a horse | |
6020 | ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] | |
6021 | ||
6022 | now is the winter of our discontent made glorious summer by this sun of york | |
6023 | ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] | |
6024 | """ | |
6025 | ||
6026 | def pa(s, l, t): | |
6027 | return [func(tokn, *args) for tokn in t] | |
6028 | ||
6029 | func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) | |
6030 | pa.__name__ = func_name | |
6031 | ||
6032 | return pa | |
6033 | ||
6034 | ||
6035 | def autoname_elements() -> None: | |
6036 | """ | |
6037 | Utility to simplify mass-naming of parser elements, for | |
6038 | generating railroad diagram with named subdiagrams. | |
6039 | """ | |
6040 | calling_frame = sys._getframe().f_back | |
6041 | if calling_frame is None: | |
6042 | return | |
6043 | calling_frame = typing.cast(types.FrameType, calling_frame) | |
6044 | for name, var in calling_frame.f_locals.items(): | |
6045 | if isinstance(var, ParserElement) and not var.customName: | |
6046 | var.set_name(name) | |
6047 | ||
6048 | ||
6049 | dbl_quoted_string = Combine( | |
6050 | Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' | |
6051 | ).set_name("string enclosed in double quotes") | |
6052 | ||
6053 | sgl_quoted_string = Combine( | |
6054 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" | |
6055 | ).set_name("string enclosed in single quotes") | |
6056 | ||
6057 | quoted_string = Combine( | |
6058 | (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( | |
6059 | "double quoted string" | |
6060 | ) | |
6061 | | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( | |
6062 | "single quoted string" | |
6063 | ) | |
6064 | ).set_name("quoted string using single or double quotes") | |
6065 | ||
6066 | python_quoted_string = Combine( | |
6067 | (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( | |
6068 | "multiline double quoted string" | |
6069 | ) | |
6070 | ^ ( | |
6071 | Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" | |
6072 | ).set_name("multiline single quoted string") | |
6073 | ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( | |
6074 | "double quoted string" | |
6075 | ) | |
6076 | ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( | |
6077 | "single quoted string" | |
6078 | ) | |
6079 | ).set_name("Python quoted string") | |
6080 | ||
6081 | unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") | |
6082 | ||
6083 | ||
6084 | alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") | |
6085 | punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") | |
6086 | ||
6087 | # build list of built-in expressions, for future reference if a global default value | |
6088 | # gets updated | |
6089 | _builtin_exprs: List[ParserElement] = [ | |
6090 | v for v in vars().values() if isinstance(v, ParserElement) | |
6091 | ] | |
6092 | ||
6093 | # backward compatibility names | |
6094 | # fmt: off | |
6095 | sglQuotedString = sgl_quoted_string | |
6096 | dblQuotedString = dbl_quoted_string | |
6097 | quotedString = quoted_string | |
6098 | unicodeString = unicode_string | |
6099 | lineStart = line_start | |
6100 | lineEnd = line_end | |
6101 | stringStart = string_start | |
6102 | stringEnd = string_end | |
6103 | ||
6104 | @replaced_by_pep8(null_debug_action) | |
6105 | def nullDebugAction(): ... | |
6106 | ||
6107 | @replaced_by_pep8(trace_parse_action) | |
6108 | def traceParseAction(): ... | |
6109 | ||
6110 | @replaced_by_pep8(condition_as_parse_action) | |
6111 | def conditionAsParseAction(): ... | |
6112 | ||
6113 | @replaced_by_pep8(token_map) | |
6114 | def tokenMap(): ... | |
6115 | # fmt: on |