summaryrefslogtreecommitdiff
path: root/scripts/day1_demo.py
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2025-12-17 04:29:37 -0600
committerYurenHao0426 <blackhao0426@gmail.com>2025-12-17 04:29:37 -0600
commite43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 (patch)
tree6ce8a00d2f8b9ebd83c894a27ea01ac50cfb2ff5 /scripts/day1_demo.py
Initial commit (clean history)HEADmain
Diffstat (limited to 'scripts/day1_demo.py')
-rw-r--r--scripts/day1_demo.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/scripts/day1_demo.py b/scripts/day1_demo.py
new file mode 100644
index 0000000..b201229
--- /dev/null
+++ b/scripts/day1_demo.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+"""
+Day 1 Demo: End-to-end Minimal Memory RAG.
+1. Load MemoryCards + Embeddings.
+2. Receive a query.
+3. Retrieve top-k memories.
+4. Generate answer with QwenInstruct.
+"""
+
+import json
+import numpy as np
+import torch
+import sys
+import os
+
+# Add src to sys.path so we can import personalization
+sys.path.append(os.path.join(os.path.dirname(__file__), "../src"))
+
+from typing import List
+
+from personalization.config.settings import load_local_models_config
+from personalization.models.embedding.qwen3_8b import Qwen3Embedding8B
+from personalization.models.llm.qwen_instruct import QwenInstruct
+from personalization.retrieval.preference_store.schemas import MemoryCard
+
+def load_memory_store(cards_path: str, embs_path: str):
+ print(f"Loading memory store from {cards_path}...")
+ cards = []
+ with open(cards_path, "r", encoding="utf-8") as f:
+ for line in f:
+ cards.append(MemoryCard.model_validate_json(line))
+
+ embs = np.load(embs_path)
+ return cards, embs
+
+def cosine_similarity(E: np.ndarray, e_q: np.ndarray) -> np.ndarray:
+ # E: [M, d], e_q: [d]
+ # Assumes vectors are normalized
+ return np.dot(E, e_q)
+
+def dense_retrieve(
+ query: str,
+ embedder: Qwen3Embedding8B,
+ cards: List[MemoryCard],
+ E: np.ndarray,
+ topk: int = 3
+) -> List[MemoryCard]:
+
+ # Encode query
+ # encode returns list[list[float]] or tensor
+ e_q_list = embedder.encode([query], normalize=True, return_tensor=False)
+ e_q = np.array(e_q_list[0], dtype=np.float32)
+
+ # Sim
+ sims = cosine_similarity(E, e_q)
+
+ # Top-k
+ # argsort is ascending, so take last k and reverse
+ if len(cards) == 0:
+ return []
+
+ k = min(topk, len(cards))
+ idx = np.argsort(sims)[-k:][::-1]
+
+ results = [cards[i] for i in idx]
+ return results
+
+def main():
+ cards_path = "data/corpora/memory_cards.jsonl"
+ embs_path = "data/corpora/memory_embeddings.npy"
+
+ try:
+ cards, embs = load_memory_store(cards_path, embs_path)
+ print(f"Loaded {len(cards)} memory cards.")
+ except FileNotFoundError:
+ print("Error: Memory store not found. Please run scripts/migrate_preferences.py first.")
+ sys.exit(1)
+
+ cfg = load_local_models_config()
+
+ print("Initializing models...")
+ embedder = Qwen3Embedding8B.from_config(cfg)
+ llm = QwenInstruct.from_config(cfg)
+
+ # Demo Query
+ # Let's try to pick a query that should trigger a retrieval if we have relevant memories.
+ # Since we processed pilot_study, let's assume we might have some "python code" or "formatting" prefs.
+ # If the pilot study didn't yield many prefs, we might just query something generic.
+ query = "Please write a function to calculate fibonacci numbers. Remember my preferences."
+
+ # Or let's allow user input or command line arg
+ if len(sys.argv) > 1:
+ query = sys.argv[1]
+
+ print(f"\nQuery: {query}")
+
+ # Retrieve
+ hits = dense_retrieve(query, embedder, cards, embs, topk=3)
+
+ print(f"\nRetrieved {len(hits)} memories:")
+ notes = []
+ for h in hits:
+ print(f" - [{h.kind}] {h.note_text} (from user: {h.user_id})")
+ notes.append(h.note_text)
+
+ # Generate
+ print("\nGenerating answer...")
+ # Mock history: just the current turn
+ history = [{"role": "user", "content": query}]
+
+ answer = llm.answer(history, notes)
+
+ print("-" * 40)
+ print("Answer:")
+ print(answer)
+ print("-" * 40)
+
+if __name__ == "__main__":
+ main()
+