diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-27 12:15:45 -0600 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-27 12:15:45 -0600 |
| commit | 680513b7771a29f27cbbb3ffb009a69a913de6f9 (patch) | |
| tree | a0d60aef9ade1b2953b915f535b990c0de95e493 /src/personalization/serving | |
| parent | c06ec2f3b80f8968f09eb801b69237495b055ec1 (diff) | |
local reward model
Diffstat (limited to 'src/personalization/serving')
| -rw-r--r-- | src/personalization/serving/personalized_llm.py | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/src/personalization/serving/personalized_llm.py b/src/personalization/serving/personalized_llm.py index 733ff87..45d002b 100644 --- a/src/personalization/serving/personalized_llm.py +++ b/src/personalization/serving/personalized_llm.py @@ -282,8 +282,9 @@ class PersonalizedLLM: use_shared_models: bool = False, # Use shared singleton models for multi-threaded efficiency reranker_type: str = "qwen3", # "qwen3" (8B) or "bge" (278M) best_of_n: int = 1, # Generate N responses and pick best (for RAG methods) - reward_mode: str = "keyword", # "keyword" (legacy heuristic) or "llm" (GPT-5-nano judge) + reward_mode: str = "keyword", # "keyword", "llm" (GPT-4o-mini), or "llm_local" (local vLLM) llm_reward_config: Optional["LLMRewardConfig"] = None, # Config for LLM judge + reward_vllm_url: Optional[str] = None, # vLLM URL for local reward model (when reward_mode="llm_local") ): """ Initialize the PersonalizedLLM. @@ -317,12 +318,21 @@ class PersonalizedLLM: self.eval_mode = eval_mode # True = greedy, False = sample self.reranker_type = reranker_type # "qwen3" or "bge" self.best_of_n = best_of_n # Generate N responses and pick best - self.reward_mode = reward_mode # "keyword" or "llm" + self.reward_mode = reward_mode # "keyword", "llm", or "llm_local" # Initialize LLM reward client if using LLM judge - self._llm_reward_client: Optional[LLMRewardClient] = None + self._llm_reward_client = None # Can be LLMRewardClient or LocalLLMRewardClient if reward_mode == "llm": self._llm_reward_client = LLMRewardClient(llm_reward_config or LLMRewardConfig()) + elif reward_mode == "llm_local": + from personalization.feedback.local_llm_reward import ( + LocalLLMRewardClient, + LocalLLMRewardConfig, + ) + local_config = LocalLLMRewardConfig( + vllm_url=reward_vllm_url or "http://localhost:8005/v1", + ) + self._llm_reward_client = LocalLLMRewardClient(local_config) # Multi-GPU device assignment self._device_assignment = device_assignment or { @@ -743,7 +753,7 @@ class PersonalizedLLM: } # Auto-compute reward via LLM judge if enabled - if self.reward_mode == "llm" and self._llm_reward_client is not None: + if self._llm_reward_client is not None: import asyncio try: reward, gating = asyncio.run(eval_step_llm( @@ -974,7 +984,7 @@ class PersonalizedLLM: } # Auto-compute reward via LLM judge if enabled - if self.reward_mode == "llm" and self._llm_reward_client is not None: + if self._llm_reward_client is not None: import asyncio try: reward, gating = asyncio.run(eval_step_llm( |
