diff options
Diffstat (limited to 'scripts/day1_demo.py')
| -rw-r--r-- | scripts/day1_demo.py | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/scripts/day1_demo.py b/scripts/day1_demo.py new file mode 100644 index 0000000..b201229 --- /dev/null +++ b/scripts/day1_demo.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +""" +Day 1 Demo: End-to-end Minimal Memory RAG. +1. Load MemoryCards + Embeddings. +2. Receive a query. +3. Retrieve top-k memories. +4. Generate answer with QwenInstruct. +""" + +import json +import numpy as np +import torch +import sys +import os + +# Add src to sys.path so we can import personalization +sys.path.append(os.path.join(os.path.dirname(__file__), "../src")) + +from typing import List + +from personalization.config.settings import load_local_models_config +from personalization.models.embedding.qwen3_8b import Qwen3Embedding8B +from personalization.models.llm.qwen_instruct import QwenInstruct +from personalization.retrieval.preference_store.schemas import MemoryCard + +def load_memory_store(cards_path: str, embs_path: str): + print(f"Loading memory store from {cards_path}...") + cards = [] + with open(cards_path, "r", encoding="utf-8") as f: + for line in f: + cards.append(MemoryCard.model_validate_json(line)) + + embs = np.load(embs_path) + return cards, embs + +def cosine_similarity(E: np.ndarray, e_q: np.ndarray) -> np.ndarray: + # E: [M, d], e_q: [d] + # Assumes vectors are normalized + return np.dot(E, e_q) + +def dense_retrieve( + query: str, + embedder: Qwen3Embedding8B, + cards: List[MemoryCard], + E: np.ndarray, + topk: int = 3 +) -> List[MemoryCard]: + + # Encode query + # encode returns list[list[float]] or tensor + e_q_list = embedder.encode([query], normalize=True, return_tensor=False) + e_q = np.array(e_q_list[0], dtype=np.float32) + + # Sim + sims = cosine_similarity(E, e_q) + + # Top-k + # argsort is ascending, so take last k and reverse + if len(cards) == 0: + return [] + + k = min(topk, len(cards)) + idx = np.argsort(sims)[-k:][::-1] + + results = [cards[i] for i in idx] + return results + +def main(): + cards_path = "data/corpora/memory_cards.jsonl" + embs_path = "data/corpora/memory_embeddings.npy" + + try: + cards, embs = load_memory_store(cards_path, embs_path) + print(f"Loaded {len(cards)} memory cards.") + except FileNotFoundError: + print("Error: Memory store not found. Please run scripts/migrate_preferences.py first.") + sys.exit(1) + + cfg = load_local_models_config() + + print("Initializing models...") + embedder = Qwen3Embedding8B.from_config(cfg) + llm = QwenInstruct.from_config(cfg) + + # Demo Query + # Let's try to pick a query that should trigger a retrieval if we have relevant memories. + # Since we processed pilot_study, let's assume we might have some "python code" or "formatting" prefs. + # If the pilot study didn't yield many prefs, we might just query something generic. + query = "Please write a function to calculate fibonacci numbers. Remember my preferences." + + # Or let's allow user input or command line arg + if len(sys.argv) > 1: + query = sys.argv[1] + + print(f"\nQuery: {query}") + + # Retrieve + hits = dense_retrieve(query, embedder, cards, embs, topk=3) + + print(f"\nRetrieved {len(hits)} memories:") + notes = [] + for h in hits: + print(f" - [{h.kind}] {h.note_text} (from user: {h.user_id})") + notes.append(h.note_text) + + # Generate + print("\nGenerating answer...") + # Mock history: just the current turn + history = [{"role": "user", "content": query}] + + answer = llm.answer(history, notes) + + print("-" * 40) + print("Answer:") + print(answer) + print("-" * 40) + +if __name__ == "__main__": + main() + |
