#!/usr/bin/env python3 """ Day 1 Demo: End-to-end Minimal Memory RAG. 1. Load MemoryCards + Embeddings. 2. Receive a query. 3. Retrieve top-k memories. 4. Generate answer with QwenInstruct. """ import json import numpy as np import torch import sys import os # Add src to sys.path so we can import personalization sys.path.append(os.path.join(os.path.dirname(__file__), "../src")) from typing import List from personalization.config.settings import load_local_models_config from personalization.models.embedding.qwen3_8b import Qwen3Embedding8B from personalization.models.llm.qwen_instruct import QwenInstruct from personalization.retrieval.preference_store.schemas import MemoryCard def load_memory_store(cards_path: str, embs_path: str): print(f"Loading memory store from {cards_path}...") cards = [] with open(cards_path, "r", encoding="utf-8") as f: for line in f: cards.append(MemoryCard.model_validate_json(line)) embs = np.load(embs_path) return cards, embs def cosine_similarity(E: np.ndarray, e_q: np.ndarray) -> np.ndarray: # E: [M, d], e_q: [d] # Assumes vectors are normalized return np.dot(E, e_q) def dense_retrieve( query: str, embedder: Qwen3Embedding8B, cards: List[MemoryCard], E: np.ndarray, topk: int = 3 ) -> List[MemoryCard]: # Encode query # encode returns list[list[float]] or tensor e_q_list = embedder.encode([query], normalize=True, return_tensor=False) e_q = np.array(e_q_list[0], dtype=np.float32) # Sim sims = cosine_similarity(E, e_q) # Top-k # argsort is ascending, so take last k and reverse if len(cards) == 0: return [] k = min(topk, len(cards)) idx = np.argsort(sims)[-k:][::-1] results = [cards[i] for i in idx] return results def main(): cards_path = "data/corpora/memory_cards.jsonl" embs_path = "data/corpora/memory_embeddings.npy" try: cards, embs = load_memory_store(cards_path, embs_path) print(f"Loaded {len(cards)} memory cards.") except FileNotFoundError: print("Error: Memory store not found. Please run scripts/migrate_preferences.py first.") sys.exit(1) cfg = load_local_models_config() print("Initializing models...") embedder = Qwen3Embedding8B.from_config(cfg) llm = QwenInstruct.from_config(cfg) # Demo Query # Let's try to pick a query that should trigger a retrieval if we have relevant memories. # Since we processed pilot_study, let's assume we might have some "python code" or "formatting" prefs. # If the pilot study didn't yield many prefs, we might just query something generic. query = "Please write a function to calculate fibonacci numbers. Remember my preferences." # Or let's allow user input or command line arg if len(sys.argv) > 1: query = sys.argv[1] print(f"\nQuery: {query}") # Retrieve hits = dense_retrieve(query, embedder, cards, embs, topk=3) print(f"\nRetrieved {len(hits)} memories:") notes = [] for h in hits: print(f" - [{h.kind}] {h.note_text} (from user: {h.user_id})") notes.append(h.note_text) # Generate print("\nGenerating answer...") # Mock history: just the current turn history = [{"role": "user", "content": query}] answer = llm.answer(history, notes) print("-" * 40) print("Answer:") print(answer) print("-" * 40) if __name__ == "__main__": main()