blob: 7ef442d6c927e0fdccf6bafbecdbfbfd2b1a5bc6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
import hashlib
import json
def get_line_hash(line_str: str) -> str:
"""Compute SHA256 hash of the line content to match shared_context_id."""
return hashlib.sha256(line_str.strip().encode("utf-8")).hexdigest()
def debug_hash():
jsonl_path = "data/raw_datasets/personamem/shared_contexts_32k.jsonl"
with open(jsonl_path, "r") as f:
first_line = f.readline()
computed_hash = get_line_hash(first_line)
target_hash = "e898d03fec683b1cabf29f57287ff66f8a31842543ecef44b56766844c1c1301"
print(f"Computed: {computed_hash}")
print(f"Target: {target_hash}")
print(f"Match: {computed_hash == target_hash}")
if __name__ == "__main__":
debug_hash()
|