summaryrefslogtreecommitdiff
path: root/scripts/debug_personamem_hash.py
blob: 7ef442d6c927e0fdccf6bafbecdbfbfd2b1a5bc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import hashlib
import json

def get_line_hash(line_str: str) -> str:
    """Compute SHA256 hash of the line content to match shared_context_id."""
    return hashlib.sha256(line_str.strip().encode("utf-8")).hexdigest()

def debug_hash():
    jsonl_path = "data/raw_datasets/personamem/shared_contexts_32k.jsonl"
    with open(jsonl_path, "r") as f:
        first_line = f.readline()
        
    computed_hash = get_line_hash(first_line)
    target_hash = "e898d03fec683b1cabf29f57287ff66f8a31842543ecef44b56766844c1c1301"
    
    print(f"Computed: {computed_hash}")
    print(f"Target:   {target_hash}")
    print(f"Match:    {computed_hash == target_hash}")

if __name__ == "__main__":
    debug_hash()