From e43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 17 Dec 2025 04:29:37 -0600 Subject: Initial commit (clean history) --- scripts/debug_personamem_hash.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 scripts/debug_personamem_hash.py (limited to 'scripts/debug_personamem_hash.py') diff --git a/scripts/debug_personamem_hash.py b/scripts/debug_personamem_hash.py new file mode 100644 index 0000000..7ef442d --- /dev/null +++ b/scripts/debug_personamem_hash.py @@ -0,0 +1,22 @@ +import hashlib +import json + +def get_line_hash(line_str: str) -> str: + """Compute SHA256 hash of the line content to match shared_context_id.""" + return hashlib.sha256(line_str.strip().encode("utf-8")).hexdigest() + +def debug_hash(): + jsonl_path = "data/raw_datasets/personamem/shared_contexts_32k.jsonl" + with open(jsonl_path, "r") as f: + first_line = f.readline() + + computed_hash = get_line_hash(first_line) + target_hash = "e898d03fec683b1cabf29f57287ff66f8a31842543ecef44b56766844c1c1301" + + print(f"Computed: {computed_hash}") + print(f"Target: {target_hash}") + print(f"Match: {computed_hash == target_hash}") + +if __name__ == "__main__": + debug_hash() + -- cgit v1.2.3