1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#!/usr/bin/env python3
"""
Day 1 Demo: End-to-end Minimal Memory RAG.
1. Load MemoryCards + Embeddings.
2. Receive a query.
3. Retrieve top-k memories.
4. Generate answer with QwenInstruct.
"""
import json
import numpy as np
import torch
import sys
import os
# Add src to sys.path so we can import personalization
sys.path.append(os.path.join(os.path.dirname(__file__), "../src"))
from typing import List
from personalization.config.settings import load_local_models_config
from personalization.models.embedding.qwen3_8b import Qwen3Embedding8B
from personalization.models.llm.qwen_instruct import QwenInstruct
from personalization.retrieval.preference_store.schemas import MemoryCard
def load_memory_store(cards_path: str, embs_path: str):
print(f"Loading memory store from {cards_path}...")
cards = []
with open(cards_path, "r", encoding="utf-8") as f:
for line in f:
cards.append(MemoryCard.model_validate_json(line))
embs = np.load(embs_path)
return cards, embs
def cosine_similarity(E: np.ndarray, e_q: np.ndarray) -> np.ndarray:
# E: [M, d], e_q: [d]
# Assumes vectors are normalized
return np.dot(E, e_q)
def dense_retrieve(
query: str,
embedder: Qwen3Embedding8B,
cards: List[MemoryCard],
E: np.ndarray,
topk: int = 3
) -> List[MemoryCard]:
# Encode query
# encode returns list[list[float]] or tensor
e_q_list = embedder.encode([query], normalize=True, return_tensor=False)
e_q = np.array(e_q_list[0], dtype=np.float32)
# Sim
sims = cosine_similarity(E, e_q)
# Top-k
# argsort is ascending, so take last k and reverse
if len(cards) == 0:
return []
k = min(topk, len(cards))
idx = np.argsort(sims)[-k:][::-1]
results = [cards[i] for i in idx]
return results
def main():
cards_path = "data/corpora/memory_cards.jsonl"
embs_path = "data/corpora/memory_embeddings.npy"
try:
cards, embs = load_memory_store(cards_path, embs_path)
print(f"Loaded {len(cards)} memory cards.")
except FileNotFoundError:
print("Error: Memory store not found. Please run scripts/migrate_preferences.py first.")
sys.exit(1)
cfg = load_local_models_config()
print("Initializing models...")
embedder = Qwen3Embedding8B.from_config(cfg)
llm = QwenInstruct.from_config(cfg)
# Demo Query
# Let's try to pick a query that should trigger a retrieval if we have relevant memories.
# Since we processed pilot_study, let's assume we might have some "python code" or "formatting" prefs.
# If the pilot study didn't yield many prefs, we might just query something generic.
query = "Please write a function to calculate fibonacci numbers. Remember my preferences."
# Or let's allow user input or command line arg
if len(sys.argv) > 1:
query = sys.argv[1]
print(f"\nQuery: {query}")
# Retrieve
hits = dense_retrieve(query, embedder, cards, embs, topk=3)
print(f"\nRetrieved {len(hits)} memories:")
notes = []
for h in hits:
print(f" - [{h.kind}] {h.note_text} (from user: {h.user_id})")
notes.append(h.note_text)
# Generate
print("\nGenerating answer...")
# Mock history: just the current turn
history = [{"role": "user", "content": query}]
answer = llm.answer(history, notes)
print("-" * 40)
print("Answer:")
print(answer)
print("-" * 40)
if __name__ == "__main__":
main()
|