From 680513b7771a29f27cbbb3ffb009a69a913de6f9 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Tue, 27 Jan 2026 12:15:45 -0600 Subject: local reward model --- src/personalization/serving/personalized_llm.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'src/personalization/serving/personalized_llm.py') diff --git a/src/personalization/serving/personalized_llm.py b/src/personalization/serving/personalized_llm.py index 733ff87..45d002b 100644 --- a/src/personalization/serving/personalized_llm.py +++ b/src/personalization/serving/personalized_llm.py @@ -282,8 +282,9 @@ class PersonalizedLLM: use_shared_models: bool = False, # Use shared singleton models for multi-threaded efficiency reranker_type: str = "qwen3", # "qwen3" (8B) or "bge" (278M) best_of_n: int = 1, # Generate N responses and pick best (for RAG methods) - reward_mode: str = "keyword", # "keyword" (legacy heuristic) or "llm" (GPT-5-nano judge) + reward_mode: str = "keyword", # "keyword", "llm" (GPT-4o-mini), or "llm_local" (local vLLM) llm_reward_config: Optional["LLMRewardConfig"] = None, # Config for LLM judge + reward_vllm_url: Optional[str] = None, # vLLM URL for local reward model (when reward_mode="llm_local") ): """ Initialize the PersonalizedLLM. @@ -317,12 +318,21 @@ class PersonalizedLLM: self.eval_mode = eval_mode # True = greedy, False = sample self.reranker_type = reranker_type # "qwen3" or "bge" self.best_of_n = best_of_n # Generate N responses and pick best - self.reward_mode = reward_mode # "keyword" or "llm" + self.reward_mode = reward_mode # "keyword", "llm", or "llm_local" # Initialize LLM reward client if using LLM judge - self._llm_reward_client: Optional[LLMRewardClient] = None + self._llm_reward_client = None # Can be LLMRewardClient or LocalLLMRewardClient if reward_mode == "llm": self._llm_reward_client = LLMRewardClient(llm_reward_config or LLMRewardConfig()) + elif reward_mode == "llm_local": + from personalization.feedback.local_llm_reward import ( + LocalLLMRewardClient, + LocalLLMRewardConfig, + ) + local_config = LocalLLMRewardConfig( + vllm_url=reward_vllm_url or "http://localhost:8005/v1", + ) + self._llm_reward_client = LocalLLMRewardClient(local_config) # Multi-GPU device assignment self._device_assignment = device_assignment or { @@ -743,7 +753,7 @@ class PersonalizedLLM: } # Auto-compute reward via LLM judge if enabled - if self.reward_mode == "llm" and self._llm_reward_client is not None: + if self._llm_reward_client is not None: import asyncio try: reward, gating = asyncio.run(eval_step_llm( @@ -974,7 +984,7 @@ class PersonalizedLLM: } # Auto-compute reward via LLM judge if enabled - if self.reward_mode == "llm" and self._llm_reward_client is not None: + if self._llm_reward_client is not None: import asyncio try: reward, gating = asyncio.run(eval_step_llm( -- cgit v1.2.3