From f918fc90b8d71d1287590b016d926268be573de0 Mon Sep 17 00:00:00 2001
From: YurenHao0426 <blackhao0426@gmail.com>
Date: Tue, 27 Jan 2026 15:43:42 -0600
Subject: Add model wrapper modules (embedding, reranker, llm,
 preference_extractor)

Add Python wrappers for:
- Qwen3/Nemotron embedding models
- BGE/Qwen3 rerankers
- vLLM/Llama/Qwen LLM backends
- GPT-4o/LLM-based preference extractors

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/personalization/models/llm/base.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 src/personalization/models/llm/base.py

(limited to 'src/personalization/models/llm/base.py')

diff --git a/src/personalization/models/llm/base.py b/src/personalization/models/llm/base.py
new file mode 100644
index 0000000..72b6ca8
--- /dev/null
+++ b/src/personalization/models/llm/base.py
@@ -0,0 +1,29 @@
+from typing import List, Protocol, Optional
+from personalization.types import ChatTurn
+
+class ChatModel(Protocol):
+    def answer(
+        self,
+        history: List[ChatTurn],
+        memory_notes: List[str],
+        max_new_tokens: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        top_k: Optional[int] = None,
+    ) -> str:
+        """
+        Generate an assistant response given conversation history and memory notes.
+
+        Args:
+            history: The conversation history ending with the current user turn.
+            memory_notes: List of retrieved memory content strings.
+            max_new_tokens: Max tokens to generate.
+            temperature: Sampling temperature.
+            top_p: Top-p sampling.
+            top_k: Top-k sampling.
+
+        Returns:
+            The generated assistant response text.
+        """
+        ...
+
-- 
cgit v1.2.3