From dc801c07cf38b0c495686463e6ca6f871a64440e Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Tue, 27 Jan 2026 09:57:37 -0600 Subject: Add collaborativeagents module and update gitignore - Add collaborativeagents subproject with adapters, agents, and evaluation modules - Update .gitignore to exclude large binary files (.whl, .tar), wandb logs, and results Co-Authored-By: Claude Opus 4.5 --- .../slurm/fullscale/continue_to_30sess.sh | 76 ++++ .../slurm/fullscale/generate_jobs.sh | 89 ++++ .../slurm/fullscale/run_all_memory_p0.sh | 66 +++ .../slurm/fullscale/run_all_memory_p100.sh | 66 +++ .../slurm/fullscale/run_all_memory_p150.sh | 66 +++ .../slurm/fullscale/run_all_memory_p50.sh | 66 +++ .../slurm/fullscale/run_contextual_p0.sh | 66 +++ .../slurm/fullscale/run_contextual_p100.sh | 66 +++ .../slurm/fullscale/run_contextual_p150.sh | 66 +++ .../slurm/fullscale/run_contextual_p50.sh | 66 +++ collaborativeagents/slurm/fullscale/run_rag_p0.sh | 66 +++ .../slurm/fullscale/run_rag_p100.sh | 66 +++ .../slurm/fullscale/run_rag_p150.sh | 66 +++ collaborativeagents/slurm/fullscale/run_rag_p50.sh | 66 +++ .../slurm/fullscale/run_rag_vector_p0.sh | 66 +++ .../slurm/fullscale/run_rag_vector_p100.sh | 66 +++ .../slurm/fullscale/run_rag_vector_p150.sh | 66 +++ .../slurm/fullscale/run_rag_vector_p50.sh | 66 +++ .../slurm/fullscale/run_reflection_p0.sh | 66 +++ .../slurm/fullscale/run_reflection_p100.sh | 66 +++ .../slurm/fullscale/run_reflection_p150.sh | 66 +++ .../slurm/fullscale/run_reflection_p50.sh | 66 +++ .../slurm/fullscale/run_vanilla_p0.sh | 66 +++ .../slurm/fullscale/run_vanilla_p100.sh | 66 +++ .../slurm/fullscale/run_vanilla_p150.sh | 66 +++ .../slurm/fullscale/run_vanilla_p50.sh | 66 +++ collaborativeagents/slurm/fullscale/submit_all.sh | 29 ++ .../slurm/fullscale/test_25parallel.sh | 66 +++ .../slurm/fullscale/test_25parallel_15649074.err | 386 ++++++++++++++++ .../slurm/fullscale/test_50parallel.sh | 66 +++ .../slurm/fullscale/test_50parallel_15649149.err | 504 +++++++++++++++++++++ .../slurm/fullscale/test_batch_fix.sh | 69 +++ .../slurm/fullscale/test_batch_fix_15651956.err | 165 +++++++ .../slurm/fullscale/test_local_user.sh | 94 ++++ .../slurm/fullscale/test_local_user_15652698.err | 215 +++++++++ collaborativeagents/slurm/fullscale/test_run.sh | 70 +++ 36 files changed, 3413 insertions(+) create mode 100644 collaborativeagents/slurm/fullscale/continue_to_30sess.sh create mode 100644 collaborativeagents/slurm/fullscale/generate_jobs.sh create mode 100755 collaborativeagents/slurm/fullscale/run_all_memory_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_all_memory_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_all_memory_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_all_memory_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_contextual_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_contextual_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_contextual_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_contextual_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_reflection_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_reflection_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_reflection_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_reflection_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_vanilla_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_vanilla_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_vanilla_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_vanilla_p50.sh create mode 100644 collaborativeagents/slurm/fullscale/submit_all.sh create mode 100644 collaborativeagents/slurm/fullscale/test_25parallel.sh create mode 100644 collaborativeagents/slurm/fullscale/test_25parallel_15649074.err create mode 100644 collaborativeagents/slurm/fullscale/test_50parallel.sh create mode 100644 collaborativeagents/slurm/fullscale/test_50parallel_15649149.err create mode 100644 collaborativeagents/slurm/fullscale/test_batch_fix.sh create mode 100644 collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err create mode 100644 collaborativeagents/slurm/fullscale/test_local_user.sh create mode 100644 collaborativeagents/slurm/fullscale/test_local_user_15652698.err create mode 100644 collaborativeagents/slurm/fullscale/test_run.sh (limited to 'collaborativeagents/slurm/fullscale') diff --git a/collaborativeagents/slurm/fullscale/continue_to_30sess.sh b/collaborativeagents/slurm/fullscale/continue_to_30sess.sh new file mode 100644 index 0000000..93ddecc --- /dev/null +++ b/collaborativeagents/slurm/fullscale/continue_to_30sess.sh @@ -0,0 +1,76 @@ +#!/bin/bash +#SBATCH --job-name=continue_30sess +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=16:00:00 +#SBATCH --output=continue_30sess_%j.out +#SBATCH --error=continue_30sess_%j.err + +# Continue experiment from 15 to 30 sessions +# This will pick up from existing checkpoints and only run remaining sessions + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Find the existing results directory +RESULTS_DIR=$(ls -td ../results/fullscale_15sess/*/ 2>/dev/null | head -1) + +if [ -z "$RESULTS_DIR" ]; then + echo "ERROR: No existing results directory found in fullscale_15sess/" + exit 1 +fi + +echo "Continuing from: $RESULTS_DIR" + +# Continue with 30 sessions (the checkpoint system will skip already-completed sessions) +python run_experiments.py \ + --methods vanilla,contextual,reflection,all_memory,rag,rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --n-sessions 30 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --continue-from "$RESULTS_DIR" + +echo "Continue to 30 sessions complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/generate_jobs.sh b/collaborativeagents/slurm/fullscale/generate_jobs.sh new file mode 100644 index 0000000..0bc5c0b --- /dev/null +++ b/collaborativeagents/slurm/fullscale/generate_jobs.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Generate all job scripts (6 methods × 4 profile ranges = 24 jobs) +# Each job: 50 profiles × 15 sessions = 750 sessions ≈ 7-8 hours + +METHODS="vanilla contextual reflection all_memory rag rag_vector" +RANGES="0:50 50:100 100:150 150:200" + +for method in $METHODS; do + for range in $RANGES; do + start=${range%:*} + end=${range#*:} + + cat > run_${method}_p${start}.sh << EOF +#!/bin/bash +#SBATCH --job-name=exp_${method}_p${start} +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_${method}_p${start}_%j.out +#SBATCH --error=exp_${method}_p${start}_%j.err + +# Full run: ${method} method, profiles ${start}-${end} (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="\${PWD}/src:\${PWD}/collaborativeagents:\${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \\ + --model \$MODEL_8B --port 8003 --tensor-parallel-size 1 \\ + --gpu-memory-utilization 0.5 --max-model-len 8192 \\ + --dtype bfloat16 --disable-log-requests & + +for i in \$(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \\ + --methods ${method} \\ + --datasets math-hard \\ + --n-profiles 200 \\ + --start-profile ${start} \\ + --end-profile ${end} \\ + --n-sessions 15 \\ + --max-turns 8 \\ + --use-vllm \\ + --use-openai-user \\ + --openai-user-model gpt-5-mini \\ + --reward-mode llm \\ + --vllm-agent-url http://localhost:8003/v1 \\ + --parallel-profiles 25 \\ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \\ + --output-dir ../results/fullscale_15sess + +echo "${method} p${start}-${end} complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true +EOF + chmod +x run_${method}_p${start}.sh + echo "Created run_${method}_p${start}.sh" + done +done + +echo "" +echo "Generated 24 job scripts (6 methods × 4 profile ranges)" +echo "Each job: 50 profiles × 15 sessions = 750 sessions" +echo "Estimated time per job: ~7-8 hours" diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh new file mode 100755 index 0000000..bb7968b --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p0_%j.out +#SBATCH --error=exp_all_memory_p0_%j.err + +# Full run: all_memory method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh new file mode 100755 index 0000000..21db6de --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p100_%j.out +#SBATCH --error=exp_all_memory_p100_%j.err + +# Full run: all_memory method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh new file mode 100755 index 0000000..da7a729 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p150_%j.out +#SBATCH --error=exp_all_memory_p150_%j.err + +# Full run: all_memory method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh new file mode 100755 index 0000000..60bc9ee --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p50_%j.out +#SBATCH --error=exp_all_memory_p50_%j.err + +# Full run: all_memory method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p0.sh b/collaborativeagents/slurm/fullscale/run_contextual_p0.sh new file mode 100755 index 0000000..6fa0211 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p0_%j.out +#SBATCH --error=exp_contextual_p0_%j.err + +# Full run: contextual method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p100.sh b/collaborativeagents/slurm/fullscale/run_contextual_p100.sh new file mode 100755 index 0000000..8250c19 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p100_%j.out +#SBATCH --error=exp_contextual_p100_%j.err + +# Full run: contextual method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p150.sh b/collaborativeagents/slurm/fullscale/run_contextual_p150.sh new file mode 100755 index 0000000..fb14058 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p150_%j.out +#SBATCH --error=exp_contextual_p150_%j.err + +# Full run: contextual method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p50.sh b/collaborativeagents/slurm/fullscale/run_contextual_p50.sh new file mode 100755 index 0000000..8b1788e --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p50_%j.out +#SBATCH --error=exp_contextual_p50_%j.err + +# Full run: contextual method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p0.sh b/collaborativeagents/slurm/fullscale/run_rag_p0.sh new file mode 100755 index 0000000..de4f038 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p0_%j.out +#SBATCH --error=exp_rag_p0_%j.err + +# Full run: rag method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p100.sh b/collaborativeagents/slurm/fullscale/run_rag_p100.sh new file mode 100755 index 0000000..c9b9d7e --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p100_%j.out +#SBATCH --error=exp_rag_p100_%j.err + +# Full run: rag method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p150.sh b/collaborativeagents/slurm/fullscale/run_rag_p150.sh new file mode 100755 index 0000000..0ec5e4f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p150_%j.out +#SBATCH --error=exp_rag_p150_%j.err + +# Full run: rag method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p50.sh b/collaborativeagents/slurm/fullscale/run_rag_p50.sh new file mode 100755 index 0000000..b625300 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p50_%j.out +#SBATCH --error=exp_rag_p50_%j.err + +# Full run: rag method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh new file mode 100755 index 0000000..1f28f8f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p0_%j.out +#SBATCH --error=exp_rag_vector_p0_%j.err + +# Full run: rag_vector method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh new file mode 100755 index 0000000..b658bab --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p100_%j.out +#SBATCH --error=exp_rag_vector_p100_%j.err + +# Full run: rag_vector method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh new file mode 100755 index 0000000..8c2458f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p150_%j.out +#SBATCH --error=exp_rag_vector_p150_%j.err + +# Full run: rag_vector method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh new file mode 100755 index 0000000..afb0164 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p50_%j.out +#SBATCH --error=exp_rag_vector_p50_%j.err + +# Full run: rag_vector method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p0.sh b/collaborativeagents/slurm/fullscale/run_reflection_p0.sh new file mode 100755 index 0000000..f5d5649 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p0_%j.out +#SBATCH --error=exp_reflection_p0_%j.err + +# Full run: reflection method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p100.sh b/collaborativeagents/slurm/fullscale/run_reflection_p100.sh new file mode 100755 index 0000000..68f7047 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p100_%j.out +#SBATCH --error=exp_reflection_p100_%j.err + +# Full run: reflection method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p150.sh b/collaborativeagents/slurm/fullscale/run_reflection_p150.sh new file mode 100755 index 0000000..a451e49 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p150_%j.out +#SBATCH --error=exp_reflection_p150_%j.err + +# Full run: reflection method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p50.sh b/collaborativeagents/slurm/fullscale/run_reflection_p50.sh new file mode 100755 index 0000000..dc977d7 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p50_%j.out +#SBATCH --error=exp_reflection_p50_%j.err + +# Full run: reflection method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh new file mode 100755 index 0000000..f5706c8 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p0_%j.out +#SBATCH --error=exp_vanilla_p0_%j.err + +# Full run: vanilla method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh new file mode 100755 index 0000000..8ca9ce1 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p100_%j.out +#SBATCH --error=exp_vanilla_p100_%j.err + +# Full run: vanilla method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh new file mode 100755 index 0000000..07ff6d3 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p150_%j.out +#SBATCH --error=exp_vanilla_p150_%j.err + +# Full run: vanilla method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh new file mode 100755 index 0000000..d77b881 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p50_%j.out +#SBATCH --error=exp_vanilla_p50_%j.err + +# Full run: vanilla method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/submit_all.sh b/collaborativeagents/slurm/fullscale/submit_all.sh new file mode 100644 index 0000000..5b76169 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/submit_all.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Submit all 24 jobs for full-scale experiment +# Total: 200 profiles × 6 methods × 15 sessions = 18,000 sessions +# Split: 6 methods × 4 profile ranges (50 each) = 24 jobs +# Per job: 50 profiles × 15 sessions = 750 sessions ≈ 7-8 hours + +echo "Submitting all 24 jobs for full-scale experiment..." +echo "Total: 200 profiles × 6 methods × 15 sessions = 18,000 sessions" +echo "Split: 24 jobs (6 methods × 4 profile ranges of 50)" +echo "" + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/slurm/fullscale + +JOBS="" + +for script in run_*.sh; do + if [[ "$script" != "generate_jobs.sh" && "$script" != "submit_all.sh" && "$script" != "test_run.sh" ]]; then + JOB_ID=$(sbatch "$script" | awk '{print $4}') + JOBS="$JOBS $JOB_ID" + echo "Submitted $script -> Job ID: $JOB_ID" + fi +done + +echo "" +echo "All jobs submitted!" +echo "Job IDs:$JOBS" +echo "" +echo "Monitor with: squeue -u \$USER" +echo "Check results in: collaborativeagents/results/fullscale_15sess/" diff --git a/collaborativeagents/slurm/fullscale/test_25parallel.sh b/collaborativeagents/slurm/fullscale/test_25parallel.sh new file mode 100644 index 0000000..09d5ddb --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_25parallel.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=test_25parallel +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=1:00:00 +#SBATCH --output=test_25parallel_%j.out +#SBATCH --error=test_25parallel_%j.err + +# Quick test: 25 profiles × 2 sessions × 1 method (vanilla) = 50 sessions +# With 25 parallel profiles to measure realistic throughput + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Test with 25 parallel profiles +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 25 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_25parallel_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err b/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err new file mode 100644 index 0000000..96ed829 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err @@ -0,0 +1,386 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=2749050) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 2 + +# Increase GPU utilization to 60% for higher throughput +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.6 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 50 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 50 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_50parallel_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err b/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err new file mode 100644 index 0000000..358fd24 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err @@ -0,0 +1,504 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=2003864) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 2 + +# Start vLLM server +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Test with vanilla (simplest method) +echo "=== Testing batched agent calls ===" +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 10 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 10 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_batch_fix_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err b/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err new file mode 100644 index 0000000..a7574bf --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err @@ -0,0 +1,165 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=2779888) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 3 + +echo "=== Starting 70B User Simulator (GPU 0-1, TP=2) ===" +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B \ + --port 8004 \ + --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.90 \ + --max-model-len 4096 \ + --quantization awq \ + --dtype float16 \ + --disable-log-requests \ + --guided-decoding-backend outlines & + +echo "=== Starting 8B Agent (GPU 2) ===" +CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B \ + --port 8003 \ + --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.50 \ + --max-model-len 8192 \ + --dtype bfloat16 \ + --disable-log-requests & + +# Wait for both servers +echo "Waiting for vLLM servers..." +for port in 8004 8003; do + for i in $(seq 1 120); do + curl -s http://localhost:$port/health > /dev/null 2>&1 && break + sleep 2 + done + echo " Port $port ready." +done + +cd collaborativeagents/scripts + +echo "" +echo "=== Running Test: 10 profiles × 2 sessions with LOCAL user simulator ===" +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 10 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --reward-mode llm \ + --parallel-profiles 10 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_local_user_$(date +%Y%m%d_%H%M%S) + +echo "" +echo "=== Test Complete ===" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_local_user_15652698.err b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err new file mode 100644 index 0000000..4acc458 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err @@ -0,0 +1,215 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +usage: api_server.py [-h] [--headless] [--api-server-count API_SERVER_COUNT] + [--config CONFIG] [--host HOST] [--port PORT] [--uds UDS] + [--uvicorn-log-level {critical,debug,error,info,trace,warning}] + [--disable-uvicorn-access-log | --no-disable-uvicorn-access-log] + [--allow-credentials | --no-allow-credentials] + [--allowed-origins ALLOWED_ORIGINS] + [--allowed-methods ALLOWED_METHODS] + [--allowed-headers ALLOWED_HEADERS] + [--api-key API_KEY [API_KEY ...]] + [--lora-modules LORA_MODULES [LORA_MODULES ...]] + [--chat-template CHAT_TEMPLATE] + [--chat-template-content-format {auto,openai,string}] + [--trust-request-chat-template | --no-trust-request-chat-template] + [--response-role RESPONSE_ROLE] + [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] + [--ssl-ca-certs SSL_CA_CERTS] + [--enable-ssl-refresh | --no-enable-ssl-refresh] + [--ssl-cert-reqs SSL_CERT_REQS] [--root-path ROOT_PATH] + [--middleware MIDDLEWARE] + [--return-tokens-as-token-ids | --no-return-tokens-as-token-ids] + [--disable-frontend-multiprocessing | --no-disable-frontend-multiprocessing] + [--enable-request-id-headers | --no-enable-request-id-headers] + [--enable-auto-tool-choice | --no-enable-auto-tool-choice] + [--exclude-tools-when-tool-choice-none | --no-exclude-tools-when-tool-choice-none] + [--tool-call-parser {deepseek_v3,deepseek_v31,deepseek_v32,ernie45,gigachat3,glm45,granite,granite-20b-fc,hermes,hunyuan_a13b,internlm,jamba,kimi_k2,llama3_json,llama4_json,llama4_pythonic,longcat,minimax,minimax_m2,mistral,olmo3,openai,phi4_mini_json,pythonic,qwen3_coder,qwen3_xml,seed_oss,step3,xlam} or name registered in --tool-parser-plugin] + [--tool-parser-plugin TOOL_PARSER_PLUGIN] + [--tool-server TOOL_SERVER] + [--log-config-file LOG_CONFIG_FILE] + [--max-log-len MAX_LOG_LEN] + [--disable-fastapi-docs | --no-disable-fastapi-docs] + [--enable-prompt-tokens-details | --no-enable-prompt-tokens-details] + [--enable-server-load-tracking | --no-enable-server-load-tracking] + [--enable-force-include-usage | --no-enable-force-include-usage] + [--enable-tokenizer-info-endpoint | --no-enable-tokenizer-info-endpoint] + [--enable-log-outputs | --no-enable-log-outputs] + [--h11-max-incomplete-event-size H11_MAX_INCOMPLETE_EVENT_SIZE] + [--h11-max-header-count H11_MAX_HEADER_COUNT] + [--log-error-stack | --no-log-error-stack] + [--tokens-only | --no-tokens-only] [--model MODEL] + [--runner {auto,draft,generate,pooling}] + [--convert {auto,classify,embed,none,reward}] + [--tokenizer TOKENIZER] + [--tokenizer-mode ['auto', 'deepseek_v32', 'hf', 'mistral', 'slow']] + [--trust-remote-code | --no-trust-remote-code] + [--dtype {auto,bfloat16,float,float16,float32,half}] + [--seed SEED] [--hf-config-path HF_CONFIG_PATH] + [--allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH] + [--allowed-media-domains ALLOWED_MEDIA_DOMAINS [ALLOWED_MEDIA_DOMAINS ...]] + [--revision REVISION] [--code-revision CODE_REVISION] + [--tokenizer-revision TOKENIZER_REVISION] + [--max-model-len MAX_MODEL_LEN] + [--quantization QUANTIZATION] + [--enforce-eager | --no-enforce-eager] + [--max-logprobs MAX_LOGPROBS] + [--logprobs-mode {processed_logits,processed_logprobs,raw_logits,raw_logprobs}] + [--disable-sliding-window | --no-disable-sliding-window] + [--disable-cascade-attn | --no-disable-cascade-attn] + [--skip-tokenizer-init | --no-skip-tokenizer-init] + [--enable-prompt-embeds | --no-enable-prompt-embeds] + [--served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]] + [--config-format ['auto', 'hf', 'mistral']] + [--hf-token [HF_TOKEN]] [--hf-overrides HF_OVERRIDES] + [--pooler-config POOLER_CONFIG] + [--logits-processor-pattern LOGITS_PROCESSOR_PATTERN] + [--generation-config GENERATION_CONFIG] + [--override-generation-config OVERRIDE_GENERATION_CONFIG] + [--enable-sleep-mode | --no-enable-sleep-mode] + [--model-impl ['auto', 'terratorch', 'transformers', 'vllm']] + [--override-attention-dtype OVERRIDE_ATTENTION_DTYPE] + [--logits-processors LOGITS_PROCESSORS [LOGITS_PROCESSORS ...]] + [--io-processor-plugin IO_PROCESSOR_PLUGIN] + [--load-format LOAD_FORMAT] [--download-dir DOWNLOAD_DIR] + [--safetensors-load-strategy SAFETENSORS_LOAD_STRATEGY] + [--model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG] + [--ignore-patterns IGNORE_PATTERNS [IGNORE_PATTERNS ...]] + [--use-tqdm-on-load | --no-use-tqdm-on-load] + [--pt-load-map-location PT_LOAD_MAP_LOCATION] + [--attention-backend ATTENTION_BACKEND] + [--reasoning-parser REASONING_PARSER] + [--reasoning-parser-plugin REASONING_PARSER_PLUGIN] + [--distributed-executor-backend ['external_launcher', 'mp', 'ray', 'uni']] + [--pipeline-parallel-size PIPELINE_PARALLEL_SIZE] + [--master-addr MASTER_ADDR] [--master-port MASTER_PORT] + [--nnodes NNODES] [--node-rank NODE_RANK] + [--tensor-parallel-size TENSOR_PARALLEL_SIZE] + [--decode-context-parallel-size DECODE_CONTEXT_PARALLEL_SIZE] + [--dcp-kv-cache-interleave-size DCP_KV_CACHE_INTERLEAVE_SIZE] + [--cp-kv-cache-interleave-size CP_KV_CACHE_INTERLEAVE_SIZE] + [--prefill-context-parallel-size PREFILL_CONTEXT_PARALLEL_SIZE] + [--data-parallel-size DATA_PARALLEL_SIZE] + [--data-parallel-rank DATA_PARALLEL_RANK] + [--data-parallel-start-rank DATA_PARALLEL_START_RANK] + [--data-parallel-size-local DATA_PARALLEL_SIZE_LOCAL] + [--data-parallel-address DATA_PARALLEL_ADDRESS] + [--data-parallel-rpc-port DATA_PARALLEL_RPC_PORT] + [--data-parallel-backend DATA_PARALLEL_BACKEND] + [--data-parallel-hybrid-lb | --no-data-parallel-hybrid-lb | -dph] + [--data-parallel-external-lb | --no-data-parallel-external-lb | -dpe] + [--enable-expert-parallel | --no-enable-expert-parallel] + [--all2all-backend {allgather_reducescatter,deepep_high_throughput,deepep_low_latency,flashinfer_all2allv,naive,pplx,None}] + [--enable-dbo | --no-enable-dbo] + [--dbo-decode-token-threshold DBO_DECODE_TOKEN_THRESHOLD] + [--dbo-prefill-token-threshold DBO_PREFILL_TOKEN_THRESHOLD] + [--disable-nccl-for-dp-synchronization | --no-disable-nccl-for-dp-synchronization] + [--enable-eplb | --no-enable-eplb] + [--eplb-config EPLB_CONFIG] + [--expert-placement-strategy {linear,round_robin}] + [--max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS] + [--ray-workers-use-nsight | --no-ray-workers-use-nsight] + [--disable-custom-all-reduce | --no-disable-custom-all-reduce] + [--worker-cls WORKER_CLS] + [--worker-extension-cls WORKER_EXTENSION_CLS] + [--block-size {1,8,16,32,64,128,256}] + [--gpu-memory-utilization GPU_MEMORY_UTILIZATION] + [--kv-cache-memory-bytes KV_CACHE_MEMORY_BYTES] + [--swap-space SWAP_SPACE] + [--kv-cache-dtype {auto,bfloat16,fp8,fp8_ds_mla,fp8_e4m3,fp8_e5m2,fp8_inc}] + [--num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE] + [--enable-prefix-caching | --no-enable-prefix-caching] + [--prefix-caching-hash-algo {sha256,sha256_cbor,xxhash,xxhash_cbor}] + [--cpu-offload-gb CPU_OFFLOAD_GB] + [--calculate-kv-scales | --no-calculate-kv-scales] + [--kv-sharing-fast-prefill | --no-kv-sharing-fast-prefill] + [--mamba-cache-dtype {auto,float16,float32}] + [--mamba-ssm-cache-dtype {auto,float16,float32}] + [--mamba-block-size MAMBA_BLOCK_SIZE] + [--kv-offloading-size KV_OFFLOADING_SIZE] + [--kv-offloading-backend {lmcache,native,None}] + [--limit-mm-per-prompt LIMIT_MM_PER_PROMPT] + [--enable-mm-embeds | --no-enable-mm-embeds] + [--media-io-kwargs MEDIA_IO_KWARGS] + [--mm-processor-kwargs MM_PROCESSOR_KWARGS] + [--mm-processor-cache-gb MM_PROCESSOR_CACHE_GB] + [--mm-processor-cache-type {lru,shm}] + [--mm-shm-cache-max-object-size-mb MM_SHM_CACHE_MAX_OBJECT_SIZE_MB] + [--mm-encoder-tp-mode {data,weights}] + [--mm-encoder-attn-backend MM_ENCODER_ATTN_BACKEND] + [--interleave-mm-strings | --no-interleave-mm-strings] + [--skip-mm-profiling | --no-skip-mm-profiling] + [--video-pruning-rate VIDEO_PRUNING_RATE] + [--enable-lora | --no-enable-lora] + [--max-loras MAX_LORAS] + [--max-lora-rank {1,8,16,32,64,128,256,320,512}] + [--lora-dtype {auto,bfloat16,float16}] + [--max-cpu-loras MAX_CPU_LORAS] + [--fully-sharded-loras | --no-fully-sharded-loras] + [--default-mm-loras DEFAULT_MM_LORAS] + [--show-hidden-metrics-for-version SHOW_HIDDEN_METRICS_FOR_VERSION] + [--otlp-traces-endpoint OTLP_TRACES_ENDPOINT] + [--collect-detailed-traces {all,model,worker,None} [{all,model,worker,None} ...]] + [--kv-cache-metrics | --no-kv-cache-metrics] + [--kv-cache-metrics-sample KV_CACHE_METRICS_SAMPLE] + [--cudagraph-metrics | --no-cudagraph-metrics] + [--enable-layerwise-nvtx-tracing | --no-enable-layerwise-nvtx-tracing] + [--max-num-batched-tokens MAX_NUM_BATCHED_TOKENS] + [--max-num-seqs MAX_NUM_SEQS] + [--max-num-partial-prefills MAX_NUM_PARTIAL_PREFILLS] + [--max-long-partial-prefills MAX_LONG_PARTIAL_PREFILLS] + [--long-prefill-token-threshold LONG_PREFILL_TOKEN_THRESHOLD] + [--scheduling-policy {fcfs,priority}] + [--enable-chunked-prefill | --no-enable-chunked-prefill] + [--disable-chunked-mm-input | --no-disable-chunked-mm-input] + [--scheduler-cls SCHEDULER_CLS] + [--disable-hybrid-kv-cache-manager | --no-disable-hybrid-kv-cache-manager] + [--async-scheduling | --no-async-scheduling] + [--stream-interval STREAM_INTERVAL] + [--cudagraph-capture-sizes CUDAGRAPH_CAPTURE_SIZES [CUDAGRAPH_CAPTURE_SIZES ...]] + [--max-cudagraph-capture-size MAX_CUDAGRAPH_CAPTURE_SIZE] + [--speculative-config SPECULATIVE_CONFIG] + [--kv-transfer-config KV_TRANSFER_CONFIG] + [--kv-events-config KV_EVENTS_CONFIG] + [--ec-transfer-config EC_TRANSFER_CONFIG] + [--compilation-config COMPILATION_CONFIG] + [--attention-config ATTENTION_CONFIG] + [--additional-config ADDITIONAL_CONFIG] + [--structured-outputs-config STRUCTURED_OUTPUTS_CONFIG] + [--profiler-config PROFILER_CONFIG] + [--optimization-level OPTIMIZATION_LEVEL] + [--disable-log-stats] [--aggregate-engine-logging] + [--enable-log-requests | --no-enable-log-requests] + [--disable-log-requests | --no-disable-log-requests] + [model_tag] +api_server.py: error: unrecognized arguments: --guided-decoding-backend +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=3603174) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 2 + +# GPU 0: vLLM 8B agent, GPU 1: adapter models +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +# Wait for server +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM 8B agent server ready." + +cd collaborativeagents/scripts + +# Test run: vanilla + rag_vector (light + heavy methods) +python run_experiments.py \ + --methods vanilla,rag_vector \ + --datasets math-hard \ + --n-profiles 2 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 2 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_test_$(date +%Y%m%d_%H%M%S) + +echo "Test run complete!" + +pkill -f "vllm.entrypoints" 2>/dev/null || true -- cgit v1.2.3