]>
Commit | Line | Data |
---|---|---|
e0df8241 JR |
1 | # module pyparsing.py |
2 | # | |
3 | # Copyright (c) 2003-2022 Paul T. McGuire | |
4 | # | |
5 | # Permission is hereby granted, free of charge, to any person obtaining | |
6 | # a copy of this software and associated documentation files (the | |
7 | # "Software"), to deal in the Software without restriction, including | |
8 | # without limitation the rights to use, copy, modify, merge, publish, | |
9 | # distribute, sublicense, and/or sell copies of the Software, and to | |
10 | # permit persons to whom the Software is furnished to do so, subject to | |
11 | # the following conditions: | |
12 | # | |
13 | # The above copyright notice and this permission notice shall be | |
14 | # included in all copies or substantial portions of the Software. | |
15 | # | |
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
20 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
21 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
23 | # | |
24 | ||
25 | __doc__ = """ | |
26 | pyparsing module - Classes and methods to define and execute parsing grammars | |
27 | ============================================================================= | |
28 | ||
29 | The pyparsing module is an alternative approach to creating and | |
30 | executing simple grammars, vs. the traditional lex/yacc approach, or the | |
31 | use of regular expressions. With pyparsing, you don't need to learn | |
32 | a new syntax for defining grammars or matching expressions - the parsing | |
33 | module provides a library of classes that you use to construct the | |
34 | grammar directly in Python. | |
35 | ||
36 | Here is a program to parse "Hello, World!" (or any greeting of the form | |
37 | ``"<salutation>, <addressee>!"``), built up using :class:`Word`, | |
38 | :class:`Literal`, and :class:`And` elements | |
39 | (the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions, | |
40 | and the strings are auto-converted to :class:`Literal` expressions):: | |
41 | ||
42 | from pip._vendor.pyparsing import Word, alphas | |
43 | ||
44 | # define grammar of a greeting | |
45 | greet = Word(alphas) + "," + Word(alphas) + "!" | |
46 | ||
47 | hello = "Hello, World!" | |
48 | print(hello, "->", greet.parse_string(hello)) | |
49 | ||
50 | The program outputs the following:: | |
51 | ||
52 | Hello, World! -> ['Hello', ',', 'World', '!'] | |
53 | ||
54 | The Python representation of the grammar is quite readable, owing to the | |
55 | self-explanatory class names, and the use of :class:`'+'<And>`, | |
56 | :class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators. | |
57 | ||
58 | The :class:`ParseResults` object returned from | |
59 | :class:`ParserElement.parse_string` can be | |
60 | accessed as a nested list, a dictionary, or an object with named | |
61 | attributes. | |
62 | ||
63 | The pyparsing module handles some of the problems that are typically | |
64 | vexing when writing text parsers: | |
65 | ||
66 | - extra or missing whitespace (the above program will also handle | |
67 | "Hello,World!", "Hello , World !", etc.) | |
68 | - quoted strings | |
69 | - embedded comments | |
70 | ||
71 | ||
72 | Getting Started - | |
73 | ----------------- | |
74 | Visit the classes :class:`ParserElement` and :class:`ParseResults` to | |
75 | see the base classes that most other pyparsing | |
76 | classes inherit from. Use the docstrings for examples of how to: | |
77 | ||
78 | - construct literal match expressions from :class:`Literal` and | |
79 | :class:`CaselessLiteral` classes | |
80 | - construct character word-group expressions using the :class:`Word` | |
81 | class | |
82 | - see how to create repetitive expressions using :class:`ZeroOrMore` | |
83 | and :class:`OneOrMore` classes | |
84 | - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`, | |
85 | and :class:`'&'<Each>` operators to combine simple expressions into | |
86 | more complex ones | |
87 | - associate names with your parsed results using | |
88 | :class:`ParserElement.set_results_name` | |
89 | - access the parsed data, which is returned as a :class:`ParseResults` | |
90 | object | |
91 | - find some helpful expression short-cuts like :class:`DelimitedList` | |
92 | and :class:`one_of` | |
93 | - find more useful common expressions in the :class:`pyparsing_common` | |
94 | namespace class | |
95 | """ | |
96 | from typing import NamedTuple | |
97 | ||
98 | ||
99 | class version_info(NamedTuple): | |
100 | major: int | |
101 | minor: int | |
102 | micro: int | |
103 | releaselevel: str | |
104 | serial: int | |
105 | ||
106 | @property | |
107 | def __version__(self): | |
108 | return ( | |
109 | f"{self.major}.{self.minor}.{self.micro}" | |
110 | + ( | |
111 | f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}", | |
112 | "", | |
113 | )[self.releaselevel == "final"] | |
114 | ) | |
115 | ||
116 | def __str__(self): | |
117 | return f"{__name__} {self.__version__} / {__version_time__}" | |
118 | ||
119 | def __repr__(self): | |
120 | return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" | |
121 | ||
122 | ||
123 | __version_info__ = version_info(3, 1, 0, "final", 1) | |
124 | __version_time__ = "18 Jun 2023 14:05 UTC" | |
125 | __version__ = __version_info__.__version__ | |
126 | __versionTime__ = __version_time__ | |
127 | __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" | |
128 | ||
129 | from .util import * | |
130 | from .exceptions import * | |
131 | from .actions import * | |
132 | from .core import __diag__, __compat__ | |
133 | from .results import * | |
134 | from .core import * # type: ignore[misc, assignment] | |
135 | from .core import _builtin_exprs as core_builtin_exprs | |
136 | from .helpers import * # type: ignore[misc, assignment] | |
137 | from .helpers import _builtin_exprs as helper_builtin_exprs | |
138 | ||
139 | from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode | |
140 | from .testing import pyparsing_test as testing | |
141 | from .common import ( | |
142 | pyparsing_common as common, | |
143 | _builtin_exprs as common_builtin_exprs, | |
144 | ) | |
145 | ||
146 | # define backward compat synonyms | |
147 | if "pyparsing_unicode" not in globals(): | |
148 | pyparsing_unicode = unicode # type: ignore[misc] | |
149 | if "pyparsing_common" not in globals(): | |
150 | pyparsing_common = common # type: ignore[misc] | |
151 | if "pyparsing_test" not in globals(): | |
152 | pyparsing_test = testing # type: ignore[misc] | |
153 | ||
154 | core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs | |
155 | ||
156 | ||
157 | __all__ = [ | |
158 | "__version__", | |
159 | "__version_time__", | |
160 | "__author__", | |
161 | "__compat__", | |
162 | "__diag__", | |
163 | "And", | |
164 | "AtLineStart", | |
165 | "AtStringStart", | |
166 | "CaselessKeyword", | |
167 | "CaselessLiteral", | |
168 | "CharsNotIn", | |
169 | "CloseMatch", | |
170 | "Combine", | |
171 | "DelimitedList", | |
172 | "Dict", | |
173 | "Each", | |
174 | "Empty", | |
175 | "FollowedBy", | |
176 | "Forward", | |
177 | "GoToColumn", | |
178 | "Group", | |
179 | "IndentedBlock", | |
180 | "Keyword", | |
181 | "LineEnd", | |
182 | "LineStart", | |
183 | "Literal", | |
184 | "Located", | |
185 | "PrecededBy", | |
186 | "MatchFirst", | |
187 | "NoMatch", | |
188 | "NotAny", | |
189 | "OneOrMore", | |
190 | "OnlyOnce", | |
191 | "OpAssoc", | |
192 | "Opt", | |
193 | "Optional", | |
194 | "Or", | |
195 | "ParseBaseException", | |
196 | "ParseElementEnhance", | |
197 | "ParseException", | |
198 | "ParseExpression", | |
199 | "ParseFatalException", | |
200 | "ParseResults", | |
201 | "ParseSyntaxException", | |
202 | "ParserElement", | |
203 | "PositionToken", | |
204 | "QuotedString", | |
205 | "RecursiveGrammarException", | |
206 | "Regex", | |
207 | "SkipTo", | |
208 | "StringEnd", | |
209 | "StringStart", | |
210 | "Suppress", | |
211 | "Token", | |
212 | "TokenConverter", | |
213 | "White", | |
214 | "Word", | |
215 | "WordEnd", | |
216 | "WordStart", | |
217 | "ZeroOrMore", | |
218 | "Char", | |
219 | "alphanums", | |
220 | "alphas", | |
221 | "alphas8bit", | |
222 | "any_close_tag", | |
223 | "any_open_tag", | |
224 | "autoname_elements", | |
225 | "c_style_comment", | |
226 | "col", | |
227 | "common_html_entity", | |
228 | "condition_as_parse_action", | |
229 | "counted_array", | |
230 | "cpp_style_comment", | |
231 | "dbl_quoted_string", | |
232 | "dbl_slash_comment", | |
233 | "delimited_list", | |
234 | "dict_of", | |
235 | "empty", | |
236 | "hexnums", | |
237 | "html_comment", | |
238 | "identchars", | |
239 | "identbodychars", | |
240 | "infix_notation", | |
241 | "java_style_comment", | |
242 | "line", | |
243 | "line_end", | |
244 | "line_start", | |
245 | "lineno", | |
246 | "make_html_tags", | |
247 | "make_xml_tags", | |
248 | "match_only_at_col", | |
249 | "match_previous_expr", | |
250 | "match_previous_literal", | |
251 | "nested_expr", | |
252 | "null_debug_action", | |
253 | "nums", | |
254 | "one_of", | |
255 | "original_text_for", | |
256 | "printables", | |
257 | "punc8bit", | |
258 | "pyparsing_common", | |
259 | "pyparsing_test", | |
260 | "pyparsing_unicode", | |
261 | "python_style_comment", | |
262 | "quoted_string", | |
263 | "remove_quotes", | |
264 | "replace_with", | |
265 | "replace_html_entity", | |
266 | "rest_of_line", | |
267 | "sgl_quoted_string", | |
268 | "srange", | |
269 | "string_end", | |
270 | "string_start", | |
271 | "token_map", | |
272 | "trace_parse_action", | |
273 | "ungroup", | |
274 | "unicode_set", | |
275 | "unicode_string", | |
276 | "with_attribute", | |
277 | "with_class", | |
278 | # pre-PEP8 compatibility names | |
279 | "__versionTime__", | |
280 | "anyCloseTag", | |
281 | "anyOpenTag", | |
282 | "cStyleComment", | |
283 | "commonHTMLEntity", | |
284 | "conditionAsParseAction", | |
285 | "countedArray", | |
286 | "cppStyleComment", | |
287 | "dblQuotedString", | |
288 | "dblSlashComment", | |
289 | "delimitedList", | |
290 | "dictOf", | |
291 | "htmlComment", | |
292 | "indentedBlock", | |
293 | "infixNotation", | |
294 | "javaStyleComment", | |
295 | "lineEnd", | |
296 | "lineStart", | |
297 | "locatedExpr", | |
298 | "makeHTMLTags", | |
299 | "makeXMLTags", | |
300 | "matchOnlyAtCol", | |
301 | "matchPreviousExpr", | |
302 | "matchPreviousLiteral", | |
303 | "nestedExpr", | |
304 | "nullDebugAction", | |
305 | "oneOf", | |
306 | "opAssoc", | |
307 | "originalTextFor", | |
308 | "pythonStyleComment", | |
309 | "quotedString", | |
310 | "removeQuotes", | |
311 | "replaceHTMLEntity", | |
312 | "replaceWith", | |
313 | "restOfLine", | |
314 | "sglQuotedString", | |
315 | "stringEnd", | |
316 | "stringStart", | |
317 | "tokenMap", | |
318 | "traceParseAction", | |
319 | "unicodeString", | |
320 | "withAttribute", | |
321 | "withClass", | |
322 | ] |