#!/bin/bash # Run vanilla and contextual baselines with matching datasets (math-hard, math-500, bigcodebench) # Same setup as rag/reflection/rag_vector 60s experiments for fair comparison echo "Starting baseline experiments (vanilla + contextual)..." echo "$(date '+%Y-%m-%d %H:%M:%S')" # Vanilla (no personalization) echo "Starting vanilla..." python collaborativeagents/scripts/run_experiments.py \ --methods vanilla \ --datasets math-hard,math-500,bigcodebench \ --n-profiles 60 \ --n-sessions 60 \ --max-turns 10 \ --use-vllm \ --vllm-agent-url http://localhost:8003/v1 \ --vllm-user-url http://localhost:8004/v1 \ --use-batch-processing \ --batch-size 4 \ --parallel-profiles 20 \ --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ --output-dir collaborativeagents/results/vanilla_60s \ 2>&1 | tee collaborativeagents/results/vanilla_60s.log echo "Vanilla completed at $(date '+%Y-%m-%d %H:%M:%S')" # Contextual (full history in context) echo "Starting contextual..." python collaborativeagents/scripts/run_experiments.py \ --methods contextual \ --datasets math-hard,math-500,bigcodebench \ --n-profiles 60 \ --n-sessions 60 \ --max-turns 10 \ --use-vllm \ --vllm-agent-url http://localhost:8003/v1 \ --vllm-user-url http://localhost:8004/v1 \ --use-batch-processing \ --batch-size 4 \ --parallel-profiles 20 \ --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ --output-dir collaborativeagents/results/contextual_60s \ 2>&1 | tee collaborativeagents/results/contextual_60s.log echo "Both baselines completed at $(date '+%Y-%m-%d %H:%M:%S')"