From da40fccaa2176349482581bb0f7fb610e168f1b5 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 11 Feb 2026 01:13:48 +0000 Subject: Add all-methods comparison table with dataset caveat - Add vanilla/contextual/all_memory from fullscale (different datasets) - Flag dataset mismatch (5-dataset vs 3-dataset) - Queue matching-dataset vanilla/contextual experiment - Same-dataset comparison: rag_vector leads in success (54.2%) Co-Authored-By: Claude Opus 4.6 --- collaborativeagents/scripts/queue_baselines_60s.sh | 46 ++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 collaborativeagents/scripts/queue_baselines_60s.sh (limited to 'collaborativeagents/scripts/queue_baselines_60s.sh') diff --git a/collaborativeagents/scripts/queue_baselines_60s.sh b/collaborativeagents/scripts/queue_baselines_60s.sh new file mode 100755 index 0000000..0938ff4 --- /dev/null +++ b/collaborativeagents/scripts/queue_baselines_60s.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# Run vanilla and contextual baselines with matching datasets (math-hard, math-500, bigcodebench) +# Same setup as rag/reflection/rag_vector 60s experiments for fair comparison + +echo "Starting baseline experiments (vanilla + contextual)..." +echo "$(date '+%Y-%m-%d %H:%M:%S')" + +# Vanilla (no personalization) +echo "Starting vanilla..." +python collaborativeagents/scripts/run_experiments.py \ + --methods vanilla \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 60 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/vanilla_60s \ + 2>&1 | tee collaborativeagents/results/vanilla_60s.log + +echo "Vanilla completed at $(date '+%Y-%m-%d %H:%M:%S')" + +# Contextual (full history in context) +echo "Starting contextual..." +python collaborativeagents/scripts/run_experiments.py \ + --methods contextual \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 60 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/contextual_60s \ + 2>&1 | tee collaborativeagents/results/contextual_60s.log + +echo "Both baselines completed at $(date '+%Y-%m-%d %H:%M:%S')" -- cgit v1.2.3