summaryrefslogtreecommitdiff
path: root/src/personalization/serving/personalized_llm.py
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-01-27 12:15:45 -0600
committerYurenHao0426 <blackhao0426@gmail.com>2026-01-27 12:15:45 -0600
commit680513b7771a29f27cbbb3ffb009a69a913de6f9 (patch)
treea0d60aef9ade1b2953b915f535b990c0de95e493 /src/personalization/serving/personalized_llm.py
parentc06ec2f3b80f8968f09eb801b69237495b055ec1 (diff)
local reward model
Diffstat (limited to 'src/personalization/serving/personalized_llm.py')
-rw-r--r--src/personalization/serving/personalized_llm.py20
1 files changed, 15 insertions, 5 deletions
diff --git a/src/personalization/serving/personalized_llm.py b/src/personalization/serving/personalized_llm.py
index 733ff87..45d002b 100644
--- a/src/personalization/serving/personalized_llm.py
+++ b/src/personalization/serving/personalized_llm.py
@@ -282,8 +282,9 @@ class PersonalizedLLM:
use_shared_models: bool = False, # Use shared singleton models for multi-threaded efficiency
reranker_type: str = "qwen3", # "qwen3" (8B) or "bge" (278M)
best_of_n: int = 1, # Generate N responses and pick best (for RAG methods)
- reward_mode: str = "keyword", # "keyword" (legacy heuristic) or "llm" (GPT-5-nano judge)
+ reward_mode: str = "keyword", # "keyword", "llm" (GPT-4o-mini), or "llm_local" (local vLLM)
llm_reward_config: Optional["LLMRewardConfig"] = None, # Config for LLM judge
+ reward_vllm_url: Optional[str] = None, # vLLM URL for local reward model (when reward_mode="llm_local")
):
"""
Initialize the PersonalizedLLM.
@@ -317,12 +318,21 @@ class PersonalizedLLM:
self.eval_mode = eval_mode # True = greedy, False = sample
self.reranker_type = reranker_type # "qwen3" or "bge"
self.best_of_n = best_of_n # Generate N responses and pick best
- self.reward_mode = reward_mode # "keyword" or "llm"
+ self.reward_mode = reward_mode # "keyword", "llm", or "llm_local"
# Initialize LLM reward client if using LLM judge
- self._llm_reward_client: Optional[LLMRewardClient] = None
+ self._llm_reward_client = None # Can be LLMRewardClient or LocalLLMRewardClient
if reward_mode == "llm":
self._llm_reward_client = LLMRewardClient(llm_reward_config or LLMRewardConfig())
+ elif reward_mode == "llm_local":
+ from personalization.feedback.local_llm_reward import (
+ LocalLLMRewardClient,
+ LocalLLMRewardConfig,
+ )
+ local_config = LocalLLMRewardConfig(
+ vllm_url=reward_vllm_url or "http://localhost:8005/v1",
+ )
+ self._llm_reward_client = LocalLLMRewardClient(local_config)
# Multi-GPU device assignment
self._device_assignment = device_assignment or {
@@ -743,7 +753,7 @@ class PersonalizedLLM:
}
# Auto-compute reward via LLM judge if enabled
- if self.reward_mode == "llm" and self._llm_reward_client is not None:
+ if self._llm_reward_client is not None:
import asyncio
try:
reward, gating = asyncio.run(eval_step_llm(
@@ -974,7 +984,7 @@ class PersonalizedLLM:
}
# Auto-compute reward via LLM judge if enabled
- if self.reward_mode == "llm" and self._llm_reward_client is not None:
+ if self._llm_reward_client is not None:
import asyncio
try:
reward, gating = asyncio.run(eval_step_llm(