summaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/charset_normalizer/cli
diff options
context:
space:
mode:
authorblackhao <13851610112@163.com>2025-08-22 02:51:50 -0500
committerblackhao <13851610112@163.com>2025-08-22 02:51:50 -0500
commit4aab4087dc97906d0b9890035401175cdaab32d4 (patch)
tree4e2e9d88a711ec5b1cfa02e8ac72a55183b99123 /.venv/lib/python3.12/site-packages/charset_normalizer/cli
parentafa8f50d1d21c721dabcb31ad244610946ab65a3 (diff)
2.0
Diffstat (limited to '.venv/lib/python3.12/site-packages/charset_normalizer/cli')
-rw-r--r--.venv/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py8
-rw-r--r--.venv/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py381
-rw-r--r--.venv/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-312.pycbin0 -> 340 bytes
-rw-r--r--.venv/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-312.pycbin0 -> 14401 bytes
4 files changed, 389 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py
new file mode 100644
index 0000000..543a5a4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py
@@ -0,0 +1,8 @@
+from __future__ import annotations
+
+from .__main__ import cli_detect, query_yes_no
+
+__all__ = (
+ "cli_detect",
+ "query_yes_no",
+)
diff --git a/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py
new file mode 100644
index 0000000..cb64156
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py
@@ -0,0 +1,381 @@
+from __future__ import annotations
+
+import argparse
+import sys
+import typing
+from json import dumps
+from os.path import abspath, basename, dirname, join, realpath
+from platform import python_version
+from unicodedata import unidata_version
+
+import charset_normalizer.md as md_module
+from charset_normalizer import from_fp
+from charset_normalizer.models import CliDetectionResult
+from charset_normalizer.version import __version__
+
+
+def query_yes_no(question: str, default: str = "yes") -> bool:
+ """Ask a yes/no question via input() and return their answer.
+
+ "question" is a string that is presented to the user.
+ "default" is the presumed answer if the user just hits <Enter>.
+ It must be "yes" (the default), "no" or None (meaning
+ an answer is required of the user).
+
+ The "answer" return value is True for "yes" or False for "no".
+
+ Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
+ """
+ valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
+ if default is None:
+ prompt = " [y/n] "
+ elif default == "yes":
+ prompt = " [Y/n] "
+ elif default == "no":
+ prompt = " [y/N] "
+ else:
+ raise ValueError("invalid default answer: '%s'" % default)
+
+ while True:
+ sys.stdout.write(question + prompt)
+ choice = input().lower()
+ if default is not None and choice == "":
+ return valid[default]
+ elif choice in valid:
+ return valid[choice]
+ else:
+ sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
+
+
+class FileType:
+ """Factory for creating file object types
+
+ Instances of FileType are typically passed as type= arguments to the
+ ArgumentParser add_argument() method.
+
+ Keyword Arguments:
+ - mode -- A string indicating how the file is to be opened. Accepts the
+ same values as the builtin open() function.
+ - bufsize -- The file's desired buffer size. Accepts the same values as
+ the builtin open() function.
+ - encoding -- The file's encoding. Accepts the same values as the
+ builtin open() function.
+ - errors -- A string indicating how encoding and decoding errors are to
+ be handled. Accepts the same value as the builtin open() function.
+
+ Backported from CPython 3.12
+ """
+
+ def __init__(
+ self,
+ mode: str = "r",
+ bufsize: int = -1,
+ encoding: str | None = None,
+ errors: str | None = None,
+ ):
+ self._mode = mode
+ self._bufsize = bufsize
+ self._encoding = encoding
+ self._errors = errors
+
+ def __call__(self, string: str) -> typing.IO: # type: ignore[type-arg]
+ # the special argument "-" means sys.std{in,out}
+ if string == "-":
+ if "r" in self._mode:
+ return sys.stdin.buffer if "b" in self._mode else sys.stdin
+ elif any(c in self._mode for c in "wax"):
+ return sys.stdout.buffer if "b" in self._mode else sys.stdout
+ else:
+ msg = f'argument "-" with mode {self._mode}'
+ raise ValueError(msg)
+
+ # all other arguments are used as file names
+ try:
+ return open(string, self._mode, self._bufsize, self._encoding, self._errors)
+ except OSError as e:
+ message = f"can't open '{string}': {e}"
+ raise argparse.ArgumentTypeError(message)
+
+ def __repr__(self) -> str:
+ args = self._mode, self._bufsize
+ kwargs = [("encoding", self._encoding), ("errors", self._errors)]
+ args_str = ", ".join(
+ [repr(arg) for arg in args if arg != -1]
+ + [f"{kw}={arg!r}" for kw, arg in kwargs if arg is not None]
+ )
+ return f"{type(self).__name__}({args_str})"
+
+
+def cli_detect(argv: list[str] | None = None) -> int:
+ """
+ CLI assistant using ARGV and ArgumentParser
+ :param argv:
+ :return: 0 if everything is fine, anything else equal trouble
+ """
+ parser = argparse.ArgumentParser(
+ description="The Real First Universal Charset Detector. "
+ "Discover originating encoding used on text file. "
+ "Normalize text to unicode."
+ )
+
+ parser.add_argument(
+ "files", type=FileType("rb"), nargs="+", help="File(s) to be analysed"
+ )
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="store_true",
+ default=False,
+ dest="verbose",
+ help="Display complementary information about file if any. "
+ "Stdout will contain logs about the detection process.",
+ )
+ parser.add_argument(
+ "-a",
+ "--with-alternative",
+ action="store_true",
+ default=False,
+ dest="alternatives",
+ help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
+ )
+ parser.add_argument(
+ "-n",
+ "--normalize",
+ action="store_true",
+ default=False,
+ dest="normalize",
+ help="Permit to normalize input file. If not set, program does not write anything.",
+ )
+ parser.add_argument(
+ "-m",
+ "--minimal",
+ action="store_true",
+ default=False,
+ dest="minimal",
+ help="Only output the charset detected to STDOUT. Disabling JSON output.",
+ )
+ parser.add_argument(
+ "-r",
+ "--replace",
+ action="store_true",
+ default=False,
+ dest="replace",
+ help="Replace file when trying to normalize it instead of creating a new one.",
+ )
+ parser.add_argument(
+ "-f",
+ "--force",
+ action="store_true",
+ default=False,
+ dest="force",
+ help="Replace file without asking if you are sure, use this flag with caution.",
+ )
+ parser.add_argument(
+ "-i",
+ "--no-preemptive",
+ action="store_true",
+ default=False,
+ dest="no_preemptive",
+ help="Disable looking at a charset declaration to hint the detector.",
+ )
+ parser.add_argument(
+ "-t",
+ "--threshold",
+ action="store",
+ default=0.2,
+ type=float,
+ dest="threshold",
+ help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
+ )
+ parser.add_argument(
+ "--version",
+ action="version",
+ version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
+ __version__,
+ python_version(),
+ unidata_version,
+ "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
+ ),
+ help="Show version information and exit.",
+ )
+
+ args = parser.parse_args(argv)
+
+ if args.replace is True and args.normalize is False:
+ if args.files:
+ for my_file in args.files:
+ my_file.close()
+ print("Use --replace in addition of --normalize only.", file=sys.stderr)
+ return 1
+
+ if args.force is True and args.replace is False:
+ if args.files:
+ for my_file in args.files:
+ my_file.close()
+ print("Use --force in addition of --replace only.", file=sys.stderr)
+ return 1
+
+ if args.threshold < 0.0 or args.threshold > 1.0:
+ if args.files:
+ for my_file in args.files:
+ my_file.close()
+ print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
+ return 1
+
+ x_ = []
+
+ for my_file in args.files:
+ matches = from_fp(
+ my_file,
+ threshold=args.threshold,
+ explain=args.verbose,
+ preemptive_behaviour=args.no_preemptive is False,
+ )
+
+ best_guess = matches.best()
+
+ if best_guess is None:
+ print(
+ 'Unable to identify originating encoding for "{}". {}'.format(
+ my_file.name,
+ (
+ "Maybe try increasing maximum amount of chaos."
+ if args.threshold < 1.0
+ else ""
+ ),
+ ),
+ file=sys.stderr,
+ )
+ x_.append(
+ CliDetectionResult(
+ abspath(my_file.name),
+ None,
+ [],
+ [],
+ "Unknown",
+ [],
+ False,
+ 1.0,
+ 0.0,
+ None,
+ True,
+ )
+ )
+ else:
+ x_.append(
+ CliDetectionResult(
+ abspath(my_file.name),
+ best_guess.encoding,
+ best_guess.encoding_aliases,
+ [
+ cp
+ for cp in best_guess.could_be_from_charset
+ if cp != best_guess.encoding
+ ],
+ best_guess.language,
+ best_guess.alphabets,
+ best_guess.bom,
+ best_guess.percent_chaos,
+ best_guess.percent_coherence,
+ None,
+ True,
+ )
+ )
+
+ if len(matches) > 1 and args.alternatives:
+ for el in matches:
+ if el != best_guess:
+ x_.append(
+ CliDetectionResult(
+ abspath(my_file.name),
+ el.encoding,
+ el.encoding_aliases,
+ [
+ cp
+ for cp in el.could_be_from_charset
+ if cp != el.encoding
+ ],
+ el.language,
+ el.alphabets,
+ el.bom,
+ el.percent_chaos,
+ el.percent_coherence,
+ None,
+ False,
+ )
+ )
+
+ if args.normalize is True:
+ if best_guess.encoding.startswith("utf") is True:
+ print(
+ '"{}" file does not need to be normalized, as it already came from unicode.'.format(
+ my_file.name
+ ),
+ file=sys.stderr,
+ )
+ if my_file.closed is False:
+ my_file.close()
+ continue
+
+ dir_path = dirname(realpath(my_file.name))
+ file_name = basename(realpath(my_file.name))
+
+ o_: list[str] = file_name.split(".")
+
+ if args.replace is False:
+ o_.insert(-1, best_guess.encoding)
+ if my_file.closed is False:
+ my_file.close()
+ elif (
+ args.force is False
+ and query_yes_no(
+ 'Are you sure to normalize "{}" by replacing it ?'.format(
+ my_file.name
+ ),
+ "no",
+ )
+ is False
+ ):
+ if my_file.closed is False:
+ my_file.close()
+ continue
+
+ try:
+ x_[0].unicode_path = join(dir_path, ".".join(o_))
+
+ with open(x_[0].unicode_path, "wb") as fp:
+ fp.write(best_guess.output())
+ except OSError as e:
+ print(str(e), file=sys.stderr)
+ if my_file.closed is False:
+ my_file.close()
+ return 2
+
+ if my_file.closed is False:
+ my_file.close()
+
+ if args.minimal is False:
+ print(
+ dumps(
+ [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
+ ensure_ascii=True,
+ indent=4,
+ )
+ )
+ else:
+ for my_file in args.files:
+ print(
+ ", ".join(
+ [
+ el.encoding or "undefined"
+ for el in x_
+ if el.path == abspath(my_file.name)
+ ]
+ )
+ )
+
+ return 0
+
+
+if __name__ == "__main__":
+ cli_detect()
diff --git a/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..ddf3f5a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc
Binary files differ
diff --git a/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc
new file mode 100644
index 0000000..589b85e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc
Binary files differ