summaryrefslogtreecommitdiff
path: root/collaborativeagents/adapters
diff options
context:
space:
mode:
Diffstat (limited to 'collaborativeagents/adapters')
-rw-r--r--collaborativeagents/adapters/personalized_llm_adapter.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/collaborativeagents/adapters/personalized_llm_adapter.py b/collaborativeagents/adapters/personalized_llm_adapter.py
index c2d4727..b476272 100644
--- a/collaborativeagents/adapters/personalized_llm_adapter.py
+++ b/collaborativeagents/adapters/personalized_llm_adapter.py
@@ -58,9 +58,12 @@ class AdapterConfig:
# Best-of-N sampling: generate N responses and pick best (for RAG methods)
best_of_n: int = 1
- # Reward mode: "keyword" (legacy heuristic) or "llm" (GPT-5-nano judge)
+ # Reward mode: "keyword" (legacy heuristic), "llm" (GPT-4o-mini), or "llm_local" (local vLLM)
reward_mode: str = "keyword"
+ # vLLM URL for local reward model (only used when reward_mode="llm_local")
+ reward_vllm_url: str = "http://localhost:8005/v1"
+
# Reward mapping for user behavior
preference_enforcement_reward: float = -0.8 # Negative reward when user enforces
disappointment_expression_reward: float = -0.4 # Milder negative for disappointment
@@ -116,6 +119,7 @@ class PersonalizedLLMAdapter:
reranker_type=self.config.reranker_type,
best_of_n=self.config.best_of_n,
reward_mode=self.config.reward_mode,
+ reward_vllm_url=self.config.reward_vllm_url,
)
self._initialized = True
print("[Adapter] Initialization complete.")
@@ -423,6 +427,7 @@ def create_baseline_adapter(
use_vllm: bool = False,
use_shared_models: bool = False,
reward_mode: str = "keyword",
+ reward_vllm_url: str = "http://localhost:8005/v1",
) -> PersonalizedLLMAdapter:
"""
Create an adapter configured for a specific baseline.
@@ -438,7 +443,8 @@ def create_baseline_adapter(
- "rag_vector": Full personalization (Extractor + RAG + User Vector)
device_assignment: GPU assignment dict
use_vllm: If True, use vLLM HTTP API for LLM inference (much faster)
- reward_mode: Global reward mode ("keyword" or "llm") applied to all methods
+ reward_mode: Global reward mode ("keyword", "llm", or "llm_local")
+ reward_vllm_url: vLLM URL for local reward model (when reward_mode="llm_local")
use_shared_models: If True, share embedding/reranker models across parallel
workers. ESSENTIAL for parallel profile processing to avoid OOM.
@@ -592,8 +598,9 @@ def create_baseline_adapter(
if device_assignment:
config.device_assignment = device_assignment
- # Apply global reward_mode to all methods (overrides per-method defaults)
+ # Apply global reward settings to all methods (overrides per-method defaults)
config.reward_mode = reward_mode
+ config.reward_vllm_url = reward_vllm_url
return PersonalizedLLMAdapter(config)