summaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/~ip/_vendor/pyparsing/unicode.py
diff options
context:
space:
mode:
authorblackhao <13851610112@163.com>2025-08-22 02:51:50 -0500
committerblackhao <13851610112@163.com>2025-08-22 02:51:50 -0500
commit4aab4087dc97906d0b9890035401175cdaab32d4 (patch)
tree4e2e9d88a711ec5b1cfa02e8ac72a55183b99123 /.venv/lib/python3.12/site-packages/~ip/_vendor/pyparsing/unicode.py
parentafa8f50d1d21c721dabcb31ad244610946ab65a3 (diff)
2.0
Diffstat (limited to '.venv/lib/python3.12/site-packages/~ip/_vendor/pyparsing/unicode.py')
-rw-r--r--.venv/lib/python3.12/site-packages/~ip/_vendor/pyparsing/unicode.py361
1 files changed, 361 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/~ip/_vendor/pyparsing/unicode.py b/.venv/lib/python3.12/site-packages/~ip/_vendor/pyparsing/unicode.py
new file mode 100644
index 0000000..ec0b3a4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/~ip/_vendor/pyparsing/unicode.py
@@ -0,0 +1,361 @@
+# unicode.py
+
+import sys
+from itertools import filterfalse
+from typing import List, Tuple, Union
+
+
+class _lazyclassproperty:
+ def __init__(self, fn):
+ self.fn = fn
+ self.__doc__ = fn.__doc__
+ self.__name__ = fn.__name__
+
+ def __get__(self, obj, cls):
+ if cls is None:
+ cls = type(obj)
+ if not hasattr(cls, "_intern") or any(
+ cls._intern is getattr(superclass, "_intern", [])
+ for superclass in cls.__mro__[1:]
+ ):
+ cls._intern = {}
+ attrname = self.fn.__name__
+ if attrname not in cls._intern:
+ cls._intern[attrname] = self.fn(cls)
+ return cls._intern[attrname]
+
+
+UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
+
+
+class unicode_set:
+ """
+ A set of Unicode characters, for language-specific strings for
+ ``alphas``, ``nums``, ``alphanums``, and ``printables``.
+ A unicode_set is defined by a list of ranges in the Unicode character
+ set, in a class attribute ``_ranges``. Ranges can be specified using
+ 2-tuples or a 1-tuple, such as::
+
+ _ranges = [
+ (0x0020, 0x007e),
+ (0x00a0, 0x00ff),
+ (0x0100,),
+ ]
+
+ Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
+
+ A unicode set can also be defined using multiple inheritance of other unicode sets::
+
+ class CJK(Chinese, Japanese, Korean):
+ pass
+ """
+
+ _ranges: UnicodeRangeList = []
+
+ @_lazyclassproperty
+ def _chars_for_ranges(cls):
+ ret = []
+ for cc in cls.__mro__:
+ if cc is unicode_set:
+ break
+ for rr in getattr(cc, "_ranges", ()):
+ ret.extend(range(rr[0], rr[-1] + 1))
+ return [chr(c) for c in sorted(set(ret))]
+
+ @_lazyclassproperty
+ def printables(cls):
+ """all non-whitespace characters in this range"""
+ return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
+
+ @_lazyclassproperty
+ def alphas(cls):
+ """all alphabetic characters in this range"""
+ return "".join(filter(str.isalpha, cls._chars_for_ranges))
+
+ @_lazyclassproperty
+ def nums(cls):
+ """all numeric digit characters in this range"""
+ return "".join(filter(str.isdigit, cls._chars_for_ranges))
+
+ @_lazyclassproperty
+ def alphanums(cls):
+ """all alphanumeric characters in this range"""
+ return cls.alphas + cls.nums
+
+ @_lazyclassproperty
+ def identchars(cls):
+ """all characters in this range that are valid identifier characters, plus underscore '_'"""
+ return "".join(
+ sorted(
+ set(
+ "".join(filter(str.isidentifier, cls._chars_for_ranges))
+ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
+ + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
+ + "_"
+ )
+ )
+ )
+
+ @_lazyclassproperty
+ def identbodychars(cls):
+ """
+ all characters in this range that are valid identifier body characters,
+ plus the digits 0-9, and · (Unicode MIDDLE DOT)
+ """
+ return "".join(
+ sorted(
+ set(
+ cls.identchars
+ + "0123456789·"
+ + "".join(
+ [c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
+ )
+ )
+ )
+ )
+
+ @_lazyclassproperty
+ def identifier(cls):
+ """
+ a pyparsing Word expression for an identifier using this range's definitions for
+ identchars and identbodychars
+ """
+ from pip._vendor.pyparsing import Word
+
+ return Word(cls.identchars, cls.identbodychars)
+
+
+class pyparsing_unicode(unicode_set):
+ """
+ A namespace class for defining common language unicode_sets.
+ """
+
+ # fmt: off
+
+ # define ranges in language character sets
+ _ranges: UnicodeRangeList = [
+ (0x0020, sys.maxunicode),
+ ]
+
+ class BasicMultilingualPlane(unicode_set):
+ """Unicode set for the Basic Multilingual Plane"""
+ _ranges: UnicodeRangeList = [
+ (0x0020, 0xFFFF),
+ ]
+
+ class Latin1(unicode_set):
+ """Unicode set for Latin-1 Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x0020, 0x007E),
+ (0x00A0, 0x00FF),
+ ]
+
+ class LatinA(unicode_set):
+ """Unicode set for Latin-A Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x0100, 0x017F),
+ ]
+
+ class LatinB(unicode_set):
+ """Unicode set for Latin-B Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x0180, 0x024F),
+ ]
+
+ class Greek(unicode_set):
+ """Unicode set for Greek Unicode Character Ranges"""
+ _ranges: UnicodeRangeList = [
+ (0x0342, 0x0345),
+ (0x0370, 0x0377),
+ (0x037A, 0x037F),
+ (0x0384, 0x038A),
+ (0x038C,),
+ (0x038E, 0x03A1),
+ (0x03A3, 0x03E1),
+ (0x03F0, 0x03FF),
+ (0x1D26, 0x1D2A),
+ (0x1D5E,),
+ (0x1D60,),
+ (0x1D66, 0x1D6A),
+ (0x1F00, 0x1F15),
+ (0x1F18, 0x1F1D),
+ (0x1F20, 0x1F45),
+ (0x1F48, 0x1F4D),
+ (0x1F50, 0x1F57),
+ (0x1F59,),
+ (0x1F5B,),
+ (0x1F5D,),
+ (0x1F5F, 0x1F7D),
+ (0x1F80, 0x1FB4),
+ (0x1FB6, 0x1FC4),
+ (0x1FC6, 0x1FD3),
+ (0x1FD6, 0x1FDB),
+ (0x1FDD, 0x1FEF),
+ (0x1FF2, 0x1FF4),
+ (0x1FF6, 0x1FFE),
+ (0x2129,),
+ (0x2719, 0x271A),
+ (0xAB65,),
+ (0x10140, 0x1018D),
+ (0x101A0,),
+ (0x1D200, 0x1D245),
+ (0x1F7A1, 0x1F7A7),
+ ]
+
+ class Cyrillic(unicode_set):
+ """Unicode set for Cyrillic Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x0400, 0x052F),
+ (0x1C80, 0x1C88),
+ (0x1D2B,),
+ (0x1D78,),
+ (0x2DE0, 0x2DFF),
+ (0xA640, 0xA672),
+ (0xA674, 0xA69F),
+ (0xFE2E, 0xFE2F),
+ ]
+
+ class Chinese(unicode_set):
+ """Unicode set for Chinese Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x2E80, 0x2E99),
+ (0x2E9B, 0x2EF3),
+ (0x31C0, 0x31E3),
+ (0x3400, 0x4DB5),
+ (0x4E00, 0x9FEF),
+ (0xA700, 0xA707),
+ (0xF900, 0xFA6D),
+ (0xFA70, 0xFAD9),
+ (0x16FE2, 0x16FE3),
+ (0x1F210, 0x1F212),
+ (0x1F214, 0x1F23B),
+ (0x1F240, 0x1F248),
+ (0x20000, 0x2A6D6),
+ (0x2A700, 0x2B734),
+ (0x2B740, 0x2B81D),
+ (0x2B820, 0x2CEA1),
+ (0x2CEB0, 0x2EBE0),
+ (0x2F800, 0x2FA1D),
+ ]
+
+ class Japanese(unicode_set):
+ """Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"""
+
+ class Kanji(unicode_set):
+ "Unicode set for Kanji Unicode Character Range"
+ _ranges: UnicodeRangeList = [
+ (0x4E00, 0x9FBF),
+ (0x3000, 0x303F),
+ ]
+
+ class Hiragana(unicode_set):
+ """Unicode set for Hiragana Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x3041, 0x3096),
+ (0x3099, 0x30A0),
+ (0x30FC,),
+ (0xFF70,),
+ (0x1B001,),
+ (0x1B150, 0x1B152),
+ (0x1F200,),
+ ]
+
+ class Katakana(unicode_set):
+ """Unicode set for Katakana Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x3099, 0x309C),
+ (0x30A0, 0x30FF),
+ (0x31F0, 0x31FF),
+ (0x32D0, 0x32FE),
+ (0xFF65, 0xFF9F),
+ (0x1B000,),
+ (0x1B164, 0x1B167),
+ (0x1F201, 0x1F202),
+ (0x1F213,),
+ ]
+
+ 漢字 = Kanji
+ カタカナ = Katakana
+ ひらがな = Hiragana
+
+ _ranges = (
+ Kanji._ranges
+ + Hiragana._ranges
+ + Katakana._ranges
+ )
+
+ class Hangul(unicode_set):
+ """Unicode set for Hangul (Korean) Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x1100, 0x11FF),
+ (0x302E, 0x302F),
+ (0x3131, 0x318E),
+ (0x3200, 0x321C),
+ (0x3260, 0x327B),
+ (0x327E,),
+ (0xA960, 0xA97C),
+ (0xAC00, 0xD7A3),
+ (0xD7B0, 0xD7C6),
+ (0xD7CB, 0xD7FB),
+ (0xFFA0, 0xFFBE),
+ (0xFFC2, 0xFFC7),
+ (0xFFCA, 0xFFCF),
+ (0xFFD2, 0xFFD7),
+ (0xFFDA, 0xFFDC),
+ ]
+
+ Korean = Hangul
+
+ class CJK(Chinese, Japanese, Hangul):
+ """Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"""
+
+ class Thai(unicode_set):
+ """Unicode set for Thai Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x0E01, 0x0E3A),
+ (0x0E3F, 0x0E5B)
+ ]
+
+ class Arabic(unicode_set):
+ """Unicode set for Arabic Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x0600, 0x061B),
+ (0x061E, 0x06FF),
+ (0x0700, 0x077F),
+ ]
+
+ class Hebrew(unicode_set):
+ """Unicode set for Hebrew Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x0591, 0x05C7),
+ (0x05D0, 0x05EA),
+ (0x05EF, 0x05F4),
+ (0xFB1D, 0xFB36),
+ (0xFB38, 0xFB3C),
+ (0xFB3E,),
+ (0xFB40, 0xFB41),
+ (0xFB43, 0xFB44),
+ (0xFB46, 0xFB4F),
+ ]
+
+ class Devanagari(unicode_set):
+ """Unicode set for Devanagari Unicode Character Range"""
+ _ranges: UnicodeRangeList = [
+ (0x0900, 0x097F),
+ (0xA8E0, 0xA8FF)
+ ]
+
+ BMP = BasicMultilingualPlane
+
+ # add language identifiers using language Unicode
+ العربية = Arabic
+ 中文 = Chinese
+ кириллица = Cyrillic
+ Ελληνικά = Greek
+ עִברִית = Hebrew
+ 日本語 = Japanese
+ 한국어 = Korean
+ ไทย = Thai
+ देवनागरी = Devanagari
+
+ # fmt: on