summaryrefslogtreecommitdiff
path: root/collaborativeagents/scripts/run_experiments.py
diff options
context:
space:
mode:
Diffstat (limited to 'collaborativeagents/scripts/run_experiments.py')
-rw-r--r--collaborativeagents/scripts/run_experiments.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/collaborativeagents/scripts/run_experiments.py b/collaborativeagents/scripts/run_experiments.py
index 0ba0ba0..e04680c 100644
--- a/collaborativeagents/scripts/run_experiments.py
+++ b/collaborativeagents/scripts/run_experiments.py
@@ -89,10 +89,13 @@ class ExperimentConfig:
use_openai_user: bool = False
openai_user_model: str = "gpt-5" # Model name for OpenAI user agent
- # Reward mode: "keyword" (implicit user signals) or "llm" (GPT-5-nano judge)
+ # Reward mode: "keyword" (implicit user signals), "llm" (GPT-4o-mini), or "llm_local" (local vLLM)
# This is a global option applied to ALL methods that use RL updates
reward_mode: str = "keyword"
+ # vLLM URL for local reward model (only used when reward_mode="llm_local")
+ reward_vllm_url: str = "http://localhost:8005/v1"
+
# Parallel/Batch processing
parallel_profiles: int = 50 # Number of profiles to process in parallel
use_batch_processing: bool = True # Use turn-synchronous batch processing for vanilla/all_memory
@@ -248,6 +251,7 @@ class ExperimentRunner:
use_vllm=self.config.use_vllm,
use_shared_models=use_shared_models,
reward_mode=self.config.reward_mode,
+ reward_vllm_url=self.config.reward_vllm_url,
)
# Profile will be passed to start_session() when the conversation begins
return adapter
@@ -1264,8 +1268,10 @@ def main():
help="Use OpenAI API (GPT-5) for user simulation instead of vLLM")
parser.add_argument("--openai-user-model", type=str, default="gpt-5",
help="OpenAI model name for user simulator (default: gpt-5)")
- parser.add_argument("--reward-mode", type=str, default="keyword", choices=["keyword", "llm"],
- help="Reward mode for RL updates: 'keyword' (user signals) or 'llm' (GPT-5-nano judge)")
+ parser.add_argument("--reward-mode", type=str, default="keyword", choices=["keyword", "llm", "llm_local"],
+ help="Reward mode: 'keyword' (user signals), 'llm' (GPT-4o-mini), or 'llm_local' (local vLLM)")
+ parser.add_argument("--reward-vllm-url", type=str, default="http://localhost:8005/v1",
+ help="vLLM server URL for local reward model (when --reward-mode=llm_local)")
parser.add_argument("--parallel-profiles", type=int, default=50,
help="Number of profiles to process in parallel (requires --use-vllm)")
@@ -1302,6 +1308,7 @@ def main():
use_openai_user=args.use_openai_user,
openai_user_model=args.openai_user_model,
reward_mode=args.reward_mode,
+ reward_vllm_url=args.reward_vllm_url,
parallel_profiles=args.parallel_profiles,
use_batch_processing=args.use_batch_processing,
batch_size_conversations=args.batch_size,