From e43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 Mon Sep 17 00:00:00 2001
From: YurenHao0426 <blackhao0426@gmail.com>
Date: Wed, 17 Dec 2025 04:29:37 -0600
Subject: Initial commit (clean history)

---
 scripts/init_user_states.py | 86 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 scripts/init_user_states.py

(limited to 'scripts/init_user_states.py')

diff --git a/scripts/init_user_states.py b/scripts/init_user_states.py
new file mode 100644
index 0000000..73c7435
--- /dev/null
+++ b/scripts/init_user_states.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""
+Script to initialize User States (z_long) from Memory Embeddings.
+"""
+
+import sys
+import os
+import numpy as np
+import json
+from collections import defaultdict
+
+# Add src to sys.path
+sys.path.append(os.path.join(os.path.dirname(__file__), "../src"))
+
+from personalization.user_model.tensor_store import UserTensorStore, UserState
+from personalization.retrieval.preference_store.schemas import MemoryCard
+
+def main():
+    cards_path = "data/corpora/memory_cards.jsonl"
+    item_proj_path = "data/corpora/item_projection.npz"
+    user_store_path = "data/users/user_store.npz"
+    
+    # Ensure user dir
+    os.makedirs(os.path.dirname(user_store_path), exist_ok=True)
+    
+    # 1. Load data
+    print("Loading memory cards...")
+    cards = []
+    if os.path.exists(cards_path):
+        with open(cards_path, "r") as f:
+            for line in f:
+                cards.append(MemoryCard.model_validate_json(line))
+    else:
+        print("No memory cards found. Exiting.")
+        return
+
+    print("Loading item projection V...")
+    if not os.path.exists(item_proj_path):
+        print("Item projection not found. Run build_item_space.py first.")
+        return
+        
+    proj_data = np.load(item_proj_path)
+    V = proj_data["V"] # [M, k]
+    
+    if len(cards) != V.shape[0]:
+        print(f"Warning: Number of cards ({len(cards)}) != V rows ({V.shape[0]}). Mismatch?")
+        # If mismatch, we might need to be careful. For now assume aligned.
+    
+    k = V.shape[1]
+    
+    # 2. Group by user
+    user_indices = defaultdict(list)
+    for idx, card in enumerate(cards):
+        user_indices[card.user_id].append(idx)
+        
+    # 3. Initialize Store
+    print(f"Initializing UserStore at {user_store_path}...")
+    store = UserTensorStore(k=k, path=user_store_path)
+    
+    # 4. Compute z_long and save
+    print(f"Processing {len(user_indices)} users...")
+    for uid, indices in user_indices.items():
+        if not indices:
+            continue
+            
+        # Get item vectors for this user
+        # indices is list of int, V is numpy array
+        user_items = V[indices]
+        
+        # Mean pooling
+        z_long = np.mean(user_items, axis=0)
+        
+        # Get/Create state
+        state = store.get_state(uid)
+        state.z_long = z_long
+        state.z_short = np.zeros(k, dtype=np.float32)
+        state.reward_ma = 0.0
+        
+        store.save_state(state)
+        
+    store.persist()
+    print("Done. User states initialized.")
+
+if __name__ == "__main__":
+    main()
+
-- 
cgit v1.2.3