import hashlib import json def get_line_hash(line_str: str) -> str: """Compute SHA256 hash of the line content to match shared_context_id.""" return hashlib.sha256(line_str.strip().encode("utf-8")).hexdigest() def debug_hash(): jsonl_path = "data/raw_datasets/personamem/shared_contexts_32k.jsonl" with open(jsonl_path, "r") as f: first_line = f.readline() computed_hash = get_line_hash(first_line) target_hash = "e898d03fec683b1cabf29f57287ff66f8a31842543ecef44b56766844c1c1301" print(f"Computed: {computed_hash}") print(f"Target: {target_hash}") print(f"Match: {computed_hash == target_hash}") if __name__ == "__main__": debug_hash()