diff options
Diffstat (limited to 'scripts/debug_personamem_hash.py')
| -rw-r--r-- | scripts/debug_personamem_hash.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/scripts/debug_personamem_hash.py b/scripts/debug_personamem_hash.py new file mode 100644 index 0000000..7ef442d --- /dev/null +++ b/scripts/debug_personamem_hash.py @@ -0,0 +1,22 @@ +import hashlib +import json + +def get_line_hash(line_str: str) -> str: + """Compute SHA256 hash of the line content to match shared_context_id.""" + return hashlib.sha256(line_str.strip().encode("utf-8")).hexdigest() + +def debug_hash(): + jsonl_path = "data/raw_datasets/personamem/shared_contexts_32k.jsonl" + with open(jsonl_path, "r") as f: + first_line = f.readline() + + computed_hash = get_line_hash(first_line) + target_hash = "e898d03fec683b1cabf29f57287ff66f8a31842543ecef44b56766844c1c1301" + + print(f"Computed: {computed_hash}") + print(f"Target: {target_hash}") + print(f"Match: {computed_hash == target_hash}") + +if __name__ == "__main__": + debug_hash() + |
