]>
jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pip/_vendor/pyparsing/unicode.py
4 from itertools
import filterfalse
5 from typing
import List
, Tuple
, Union
8 class _lazyclassproperty
:
9 def __init__(self
, fn
):
11 self
.__doc
__ = fn
.__doc
__
12 self
.__name
__ = fn
.__name
__
14 def __get__(self
, obj
, cls
):
17 if not hasattr(cls
, "_intern") or any(
18 cls
._intern
is getattr(superclass
, "_intern", [])
19 for superclass
in cls
.__mro
__[1:]
22 attrname
= self
.fn
.__name
__
23 if attrname
not in cls
._intern
:
24 cls
._intern
[attrname
] = self
.fn(cls
)
25 return cls
._intern
[attrname
]
28 UnicodeRangeList
= List
[Union
[Tuple
[int, int], Tuple
[int]]]
33 A set of Unicode characters, for language-specific strings for
34 ``alphas``, ``nums``, ``alphanums``, and ``printables``.
35 A unicode_set is defined by a list of ranges in the Unicode character
36 set, in a class attribute ``_ranges``. Ranges can be specified using
37 2-tuples or a 1-tuple, such as::
45 Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
47 A unicode set can also be defined using multiple inheritance of other unicode sets::
49 class CJK(Chinese, Japanese, Korean):
53 _ranges
: UnicodeRangeList
= []
56 def _chars_for_ranges(cls
):
58 for cc
in cls
.__mro
__:
61 for rr
in getattr(cc
, "_ranges", ()):
62 ret
.extend(range(rr
[0], rr
[-1] + 1))
63 return [chr(c
) for c
in sorted(set(ret
))]
67 """all non-whitespace characters in this range"""
68 return "".join(filterfalse(str.isspace
, cls
._chars
_for
_ranges
))
72 """all alphabetic characters in this range"""
73 return "".join(filter(str.isalpha
, cls
._chars
_for
_ranges
))
77 """all numeric digit characters in this range"""
78 return "".join(filter(str.isdigit
, cls
._chars
_for
_ranges
))
82 """all alphanumeric characters in this range"""
83 return cls
.alphas
+ cls
.nums
87 """all characters in this range that are valid identifier characters, plus underscore '_'"""
91 "".join(filter(str.isidentifier
, cls
._chars
_for
_ranges
))
92 + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
93 + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
100 def identbodychars(cls
):
102 all characters in this range that are valid identifier body characters,
103 plus the digits 0-9, and · (Unicode MIDDLE DOT)
111 [c
for c
in cls
._chars
_for
_ranges
if ("_" + c
).isidentifier()]
120 a pyparsing Word expression for an identifier using this range's definitions for
121 identchars and identbodychars
123 from pip
._vendor
.pyparsing
import Word
125 return Word(cls
.identchars
, cls
.identbodychars
)
128 class pyparsing_unicode(unicode_set
):
130 A namespace class for defining common language unicode_sets.
135 # define ranges in language character sets
136 _ranges
: UnicodeRangeList
= [
137 (0x0020, sys
.maxunicode
),
140 class BasicMultilingualPlane(unicode_set
):
141 """Unicode set for the Basic Multilingual Plane"""
142 _ranges
: UnicodeRangeList
= [
146 class Latin1(unicode_set
):
147 """Unicode set for Latin-1 Unicode Character Range"""
148 _ranges
: UnicodeRangeList
= [
153 class LatinA(unicode_set
):
154 """Unicode set for Latin-A Unicode Character Range"""
155 _ranges
: UnicodeRangeList
= [
159 class LatinB(unicode_set
):
160 """Unicode set for Latin-B Unicode Character Range"""
161 _ranges
: UnicodeRangeList
= [
165 class Greek(unicode_set
):
166 """Unicode set for Greek Unicode Character Ranges"""
167 _ranges
: UnicodeRangeList
= [
205 class Cyrillic(unicode_set
):
206 """Unicode set for Cyrillic Unicode Character Range"""
207 _ranges
: UnicodeRangeList
= [
218 class Chinese(unicode_set
):
219 """Unicode set for Chinese Unicode Character Range"""
220 _ranges
: UnicodeRangeList
= [
241 class Japanese(unicode_set
):
242 """Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"""
244 class Kanji(unicode_set
):
245 "Unicode set for Kanji Unicode Character Range"
246 _ranges
: UnicodeRangeList
= [
251 class Hiragana(unicode_set
):
252 """Unicode set for Hiragana Unicode Character Range"""
253 _ranges
: UnicodeRangeList
= [
263 class Katakana(unicode_set
):
264 """Unicode set for Katakana Unicode Character Range"""
265 _ranges
: UnicodeRangeList
= [
287 class Hangul(unicode_set
):
288 """Unicode set for Hangul (Korean) Unicode Character Range"""
289 _ranges
: UnicodeRangeList
= [
309 class CJK(Chinese
, Japanese
, Hangul
):
310 """Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"""
312 class Thai(unicode_set
):
313 """Unicode set for Thai Unicode Character Range"""
314 _ranges
: UnicodeRangeList
= [
319 class Arabic(unicode_set
):
320 """Unicode set for Arabic Unicode Character Range"""
321 _ranges
: UnicodeRangeList
= [
327 class Hebrew(unicode_set
):
328 """Unicode set for Hebrew Unicode Character Range"""
329 _ranges
: UnicodeRangeList
= [
341 class Devanagari(unicode_set
):
342 """Unicode set for Devanagari Unicode Character Range"""
343 _ranges
: UnicodeRangeList
= [
348 BMP
= BasicMultilingualPlane
350 # add language identifiers using language Unicode
359 देवनागरी
= Devanagari