diff options
| author | blackhao <13851610112@163.com> | 2025-08-22 02:51:50 -0500 |
|---|---|---|
| committer | blackhao <13851610112@163.com> | 2025-08-22 02:51:50 -0500 |
| commit | 4aab4087dc97906d0b9890035401175cdaab32d4 (patch) | |
| tree | 4e2e9d88a711ec5b1cfa02e8ac72a55183b99123 /.venv/lib/python3.12/site-packages/~ip/_internal/utils/encoding.py | |
| parent | afa8f50d1d21c721dabcb31ad244610946ab65a3 (diff) | |
2.0
Diffstat (limited to '.venv/lib/python3.12/site-packages/~ip/_internal/utils/encoding.py')
| -rw-r--r-- | .venv/lib/python3.12/site-packages/~ip/_internal/utils/encoding.py | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/~ip/_internal/utils/encoding.py b/.venv/lib/python3.12/site-packages/~ip/_internal/utils/encoding.py new file mode 100644 index 0000000..008f06a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/~ip/_internal/utils/encoding.py @@ -0,0 +1,36 @@ +import codecs +import locale +import re +import sys +from typing import List, Tuple + +BOMS: List[Tuple[bytes, str]] = [ + (codecs.BOM_UTF8, "utf-8"), + (codecs.BOM_UTF16, "utf-16"), + (codecs.BOM_UTF16_BE, "utf-16-be"), + (codecs.BOM_UTF16_LE, "utf-16-le"), + (codecs.BOM_UTF32, "utf-32"), + (codecs.BOM_UTF32_BE, "utf-32-be"), + (codecs.BOM_UTF32_LE, "utf-32-le"), +] + +ENCODING_RE = re.compile(rb"coding[:=]\s*([-\w.]+)") + + +def auto_decode(data: bytes) -> str: + """Check a bytes string for a BOM to correctly detect the encoding + + Fallback to locale.getpreferredencoding(False) like open() on Python3""" + for bom, encoding in BOMS: + if data.startswith(bom): + return data[len(bom) :].decode(encoding) + # Lets check the first two lines as in PEP263 + for line in data.split(b"\n")[:2]: + if line[0:1] == b"#" and ENCODING_RE.search(line): + result = ENCODING_RE.search(line) + assert result is not None + encoding = result.groups()[0].decode("ascii") + return data.decode(encoding) + return data.decode( + locale.getpreferredencoding(False) or sys.getdefaultencoding(), + ) |
