From dc801c07cf38b0c495686463e6ca6f871a64440e Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Tue, 27 Jan 2026 09:57:37 -0600 Subject: Add collaborativeagents module and update gitignore - Add collaborativeagents subproject with adapters, agents, and evaluation modules - Update .gitignore to exclude large binary files (.whl, .tar), wandb logs, and results Co-Authored-By: Claude Opus 4.5 --- collaborativeagents/slurm/all_memory_15640363.err | 544 + .../slurm/fullscale/continue_to_30sess.sh | 76 + .../slurm/fullscale/generate_jobs.sh | 89 + .../slurm/fullscale/run_all_memory_p0.sh | 66 + .../slurm/fullscale/run_all_memory_p100.sh | 66 + .../slurm/fullscale/run_all_memory_p150.sh | 66 + .../slurm/fullscale/run_all_memory_p50.sh | 66 + .../slurm/fullscale/run_contextual_p0.sh | 66 + .../slurm/fullscale/run_contextual_p100.sh | 66 + .../slurm/fullscale/run_contextual_p150.sh | 66 + .../slurm/fullscale/run_contextual_p50.sh | 66 + collaborativeagents/slurm/fullscale/run_rag_p0.sh | 66 + .../slurm/fullscale/run_rag_p100.sh | 66 + .../slurm/fullscale/run_rag_p150.sh | 66 + collaborativeagents/slurm/fullscale/run_rag_p50.sh | 66 + .../slurm/fullscale/run_rag_vector_p0.sh | 66 + .../slurm/fullscale/run_rag_vector_p100.sh | 66 + .../slurm/fullscale/run_rag_vector_p150.sh | 66 + .../slurm/fullscale/run_rag_vector_p50.sh | 66 + .../slurm/fullscale/run_reflection_p0.sh | 66 + .../slurm/fullscale/run_reflection_p100.sh | 66 + .../slurm/fullscale/run_reflection_p150.sh | 66 + .../slurm/fullscale/run_reflection_p50.sh | 66 + .../slurm/fullscale/run_vanilla_p0.sh | 66 + .../slurm/fullscale/run_vanilla_p100.sh | 66 + .../slurm/fullscale/run_vanilla_p150.sh | 66 + .../slurm/fullscale/run_vanilla_p50.sh | 66 + collaborativeagents/slurm/fullscale/submit_all.sh | 29 + .../slurm/fullscale/test_25parallel.sh | 66 + .../slurm/fullscale/test_25parallel_15649074.err | 386 + .../slurm/fullscale/test_50parallel.sh | 66 + .../slurm/fullscale/test_50parallel_15649149.err | 504 + .../slurm/fullscale/test_batch_fix.sh | 69 + .../slurm/fullscale/test_batch_fix_15651956.err | 165 + .../slurm/fullscale/test_local_user.sh | 94 + .../slurm/fullscale/test_local_user_15652698.err | 215 + collaborativeagents/slurm/fullscale/test_run.sh | 70 + collaborativeagents/slurm/generate_profiles.sh | 39 + .../slurm/logs/all_memory_14360420.err | 127 + .../slurm/logs/contextual_14360423.err | 139 + .../slurm/logs/full_exp_14357783.err | 69 + .../slurm/logs/full_exp_14358390.err | 97 + collaborativeagents/slurm/logs/rag_14360421.err | 9442 ++++++++++++ .../slurm/logs/rag_vector_14360422.err | 129 + .../slurm/logs/reflection_14360424.err | 101 + .../slurm/logs/reflection_grpo_14360425.err | 119 + .../slurm/logs/run_collab_baselines_14355966.err | 14466 +++++++++++++++++++ .../slurm/logs/run_collab_baselines_14355975.err | 70 + .../slurm/logs/run_expts_a100_14355787.err | 15 + .../slurm/logs/run_expts_a100_14355797.err | 14 + .../slurm/logs/run_expts_a100_14355807.err | 10 + .../slurm/logs/run_expts_a100_14355816.err | 22 + .../slurm/logs/run_expts_a100_14355842.err | 23 + .../slurm/logs/run_expts_a100_14355851.err | 28 + .../slurm/logs/run_expts_a100_14355856.err | 10 + .../slurm/logs/run_expts_a100_14355861.err | 14 + .../slurm/logs/run_expts_a100_14355863.err | 11 + .../slurm/logs/run_expts_a100_14355865.err | 19 + .../slurm/logs/run_expts_a100_14355871.err | 16 + .../slurm/logs/run_expts_a100_14355878.err | 109 + .../slurm/logs/run_expts_a100_14355885.err | 17 + .../slurm/logs/run_expts_a100_14355888.err | 19 + .../slurm/logs/run_expts_a100_14355896.err | 12 + .../slurm/logs/run_expts_a100_14355901.err | 47 + .../slurm/logs/run_expts_a100_14355902.err | 185 + .../slurm/logs/run_expts_a100_14355919.err | 97 + .../slurm/logs/run_multiturn_14357110.err | 9 + .../slurm/logs/run_multiturn_14357122.err | 98 + .../slurm/logs/test_70b_14357753.err | 1 + .../slurm/logs/test_70b_14357762.err | 24 + .../slurm/logs/test_extractor_14363568.err | 3 + .../slurm/logs/test_multiturn_14357116.err | 13 + .../slurm/logs/test_multiturn_14357119.err | 46 + .../slurm/logs/vanilla_14360419.err | 138 + .../slurm/logs/vllm_bench_14367333.err | 6 + .../slurm/logs/vllm_bench_70b_8b_14367370.err | 180 + .../slurm/logs/vllm_only_14367345.err | 1 + collaborativeagents/slurm/rag_15640364.err | 498 + collaborativeagents/slurm/rag_vector_15640365.err | 659 + collaborativeagents/slurm/rerun_reflection.sbatch | 83 + collaborativeagents/slurm/run_all_gpt_user_test.sh | 67 + collaborativeagents/slurm/run_all_memory.sh | 57 + collaborativeagents/slurm/run_all_memory_v2.sh | 34 + collaborativeagents/slurm/run_contextual.sh | 32 + collaborativeagents/slurm/run_contextual_p0_9.sh | 36 + collaborativeagents/slurm/run_contextual_p10_19.sh | 36 + collaborativeagents/slurm/run_contextual_p20_29.sh | 36 + collaborativeagents/slurm/run_contextual_resume.sh | 34 + collaborativeagents/slurm/run_contextual_v2.sh | 59 + collaborativeagents/slurm/run_experiments.sh | 66 + collaborativeagents/slurm/run_experiments_a100.sh | 58 + .../slurm/run_experiments_collab_baselines.sh | 59 + .../slurm/run_experiments_multiturn.sh | 60 + collaborativeagents/slurm/run_full_experiment.sh | 58 + .../slurm/run_full_experiment_v2.sh | 61 + collaborativeagents/slurm/run_grpo_test.sh | 101 + collaborativeagents/slurm/run_grpo_training.sh | 69 + collaborativeagents/slurm/run_rag.sh | 57 + collaborativeagents/slurm/run_rag_bge_v2.sh | 61 + collaborativeagents/slurm/run_rag_v2.sh | 59 + collaborativeagents/slurm/run_rag_vector.sh | 57 + collaborativeagents/slurm/run_rag_vector_3x.sh | 74 + .../slurm/run_rag_vector_llm_test.sh | 76 + collaborativeagents/slurm/run_rag_vector_v2.sh | 59 + collaborativeagents/slurm/run_reflection.sh | 32 + collaborativeagents/slurm/run_reflection_grpo.sh | 32 + collaborativeagents/slurm/run_reflection_v2.sh | 126 + collaborativeagents/slurm/run_sft_h200.sh | 64 + collaborativeagents/slurm/run_sft_only.sh | 64 + collaborativeagents/slurm/run_sft_training.sh | 43 + collaborativeagents/slurm/run_vanilla.sh | 32 + collaborativeagents/slurm/run_vanilla_v2.sh | 59 + .../slurm/scale_contextual_forget.sbatch | 69 + collaborativeagents/slurm/scale_missing.sbatch | 29 + .../slurm/scale_rag_remaining.sbatch | 71 + collaborativeagents/slurm/start_model_servers.sh | 82 + collaborativeagents/slurm/test_70b_pilot.sh | 48 + collaborativeagents/slurm/test_extractor.sh | 22 + collaborativeagents/slurm/test_multiturn.sh | 38 + collaborativeagents/slurm/test_vllm_70b_8b.sh | 167 + collaborativeagents/slurm/test_vllm_benchmark.sh | 102 + collaborativeagents/slurm/test_vllm_only.sh | 117 + 122 files changed, 33745 insertions(+) create mode 100644 collaborativeagents/slurm/all_memory_15640363.err create mode 100644 collaborativeagents/slurm/fullscale/continue_to_30sess.sh create mode 100644 collaborativeagents/slurm/fullscale/generate_jobs.sh create mode 100755 collaborativeagents/slurm/fullscale/run_all_memory_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_all_memory_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_all_memory_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_all_memory_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_contextual_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_contextual_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_contextual_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_contextual_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_reflection_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_reflection_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_reflection_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_reflection_p50.sh create mode 100755 collaborativeagents/slurm/fullscale/run_vanilla_p0.sh create mode 100755 collaborativeagents/slurm/fullscale/run_vanilla_p100.sh create mode 100755 collaborativeagents/slurm/fullscale/run_vanilla_p150.sh create mode 100755 collaborativeagents/slurm/fullscale/run_vanilla_p50.sh create mode 100644 collaborativeagents/slurm/fullscale/submit_all.sh create mode 100644 collaborativeagents/slurm/fullscale/test_25parallel.sh create mode 100644 collaborativeagents/slurm/fullscale/test_25parallel_15649074.err create mode 100644 collaborativeagents/slurm/fullscale/test_50parallel.sh create mode 100644 collaborativeagents/slurm/fullscale/test_50parallel_15649149.err create mode 100644 collaborativeagents/slurm/fullscale/test_batch_fix.sh create mode 100644 collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err create mode 100644 collaborativeagents/slurm/fullscale/test_local_user.sh create mode 100644 collaborativeagents/slurm/fullscale/test_local_user_15652698.err create mode 100644 collaborativeagents/slurm/fullscale/test_run.sh create mode 100644 collaborativeagents/slurm/generate_profiles.sh create mode 100644 collaborativeagents/slurm/logs/all_memory_14360420.err create mode 100644 collaborativeagents/slurm/logs/contextual_14360423.err create mode 100644 collaborativeagents/slurm/logs/full_exp_14357783.err create mode 100644 collaborativeagents/slurm/logs/full_exp_14358390.err create mode 100644 collaborativeagents/slurm/logs/rag_14360421.err create mode 100644 collaborativeagents/slurm/logs/rag_vector_14360422.err create mode 100644 collaborativeagents/slurm/logs/reflection_14360424.err create mode 100644 collaborativeagents/slurm/logs/reflection_grpo_14360425.err create mode 100644 collaborativeagents/slurm/logs/run_collab_baselines_14355966.err create mode 100644 collaborativeagents/slurm/logs/run_collab_baselines_14355975.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355787.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355797.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355807.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355816.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355842.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355851.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355856.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355861.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355863.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355865.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355871.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355878.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355885.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355888.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355896.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355901.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355902.err create mode 100644 collaborativeagents/slurm/logs/run_expts_a100_14355919.err create mode 100644 collaborativeagents/slurm/logs/run_multiturn_14357110.err create mode 100644 collaborativeagents/slurm/logs/run_multiturn_14357122.err create mode 100644 collaborativeagents/slurm/logs/test_70b_14357753.err create mode 100644 collaborativeagents/slurm/logs/test_70b_14357762.err create mode 100644 collaborativeagents/slurm/logs/test_extractor_14363568.err create mode 100644 collaborativeagents/slurm/logs/test_multiturn_14357116.err create mode 100644 collaborativeagents/slurm/logs/test_multiturn_14357119.err create mode 100644 collaborativeagents/slurm/logs/vanilla_14360419.err create mode 100644 collaborativeagents/slurm/logs/vllm_bench_14367333.err create mode 100644 collaborativeagents/slurm/logs/vllm_bench_70b_8b_14367370.err create mode 100644 collaborativeagents/slurm/logs/vllm_only_14367345.err create mode 100644 collaborativeagents/slurm/rag_15640364.err create mode 100644 collaborativeagents/slurm/rag_vector_15640365.err create mode 100644 collaborativeagents/slurm/rerun_reflection.sbatch create mode 100644 collaborativeagents/slurm/run_all_gpt_user_test.sh create mode 100755 collaborativeagents/slurm/run_all_memory.sh create mode 100755 collaborativeagents/slurm/run_all_memory_v2.sh create mode 100755 collaborativeagents/slurm/run_contextual.sh create mode 100755 collaborativeagents/slurm/run_contextual_p0_9.sh create mode 100755 collaborativeagents/slurm/run_contextual_p10_19.sh create mode 100755 collaborativeagents/slurm/run_contextual_p20_29.sh create mode 100755 collaborativeagents/slurm/run_contextual_resume.sh create mode 100755 collaborativeagents/slurm/run_contextual_v2.sh create mode 100644 collaborativeagents/slurm/run_experiments.sh create mode 100644 collaborativeagents/slurm/run_experiments_a100.sh create mode 100755 collaborativeagents/slurm/run_experiments_collab_baselines.sh create mode 100755 collaborativeagents/slurm/run_experiments_multiturn.sh create mode 100755 collaborativeagents/slurm/run_full_experiment.sh create mode 100755 collaborativeagents/slurm/run_full_experiment_v2.sh create mode 100644 collaborativeagents/slurm/run_grpo_test.sh create mode 100755 collaborativeagents/slurm/run_grpo_training.sh create mode 100755 collaborativeagents/slurm/run_rag.sh create mode 100755 collaborativeagents/slurm/run_rag_bge_v2.sh create mode 100755 collaborativeagents/slurm/run_rag_v2.sh create mode 100755 collaborativeagents/slurm/run_rag_vector.sh create mode 100644 collaborativeagents/slurm/run_rag_vector_3x.sh create mode 100644 collaborativeagents/slurm/run_rag_vector_llm_test.sh create mode 100755 collaborativeagents/slurm/run_rag_vector_v2.sh create mode 100755 collaborativeagents/slurm/run_reflection.sh create mode 100755 collaborativeagents/slurm/run_reflection_grpo.sh create mode 100755 collaborativeagents/slurm/run_reflection_v2.sh create mode 100644 collaborativeagents/slurm/run_sft_h200.sh create mode 100644 collaborativeagents/slurm/run_sft_only.sh create mode 100755 collaborativeagents/slurm/run_sft_training.sh create mode 100755 collaborativeagents/slurm/run_vanilla.sh create mode 100755 collaborativeagents/slurm/run_vanilla_v2.sh create mode 100644 collaborativeagents/slurm/scale_contextual_forget.sbatch create mode 100644 collaborativeagents/slurm/scale_missing.sbatch create mode 100644 collaborativeagents/slurm/scale_rag_remaining.sbatch create mode 100644 collaborativeagents/slurm/start_model_servers.sh create mode 100755 collaborativeagents/slurm/test_70b_pilot.sh create mode 100755 collaborativeagents/slurm/test_extractor.sh create mode 100755 collaborativeagents/slurm/test_multiturn.sh create mode 100644 collaborativeagents/slurm/test_vllm_70b_8b.sh create mode 100644 collaborativeagents/slurm/test_vllm_benchmark.sh create mode 100644 collaborativeagents/slurm/test_vllm_only.sh (limited to 'collaborativeagents/slurm') diff --git a/collaborativeagents/slurm/all_memory_15640363.err b/collaborativeagents/slurm/all_memory_15640363.err new file mode 100644 index 0000000..f84e5ce --- /dev/null +++ b/collaborativeagents/slurm/all_memory_15640363.err @@ -0,0 +1,544 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=2387432) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Find the existing results directory +RESULTS_DIR=$(ls -td ../results/fullscale_15sess/*/ 2>/dev/null | head -1) + +if [ -z "$RESULTS_DIR" ]; then + echo "ERROR: No existing results directory found in fullscale_15sess/" + exit 1 +fi + +echo "Continuing from: $RESULTS_DIR" + +# Continue with 30 sessions (the checkpoint system will skip already-completed sessions) +python run_experiments.py \ + --methods vanilla,contextual,reflection,all_memory,rag,rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --n-sessions 30 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --continue-from "$RESULTS_DIR" + +echo "Continue to 30 sessions complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/generate_jobs.sh b/collaborativeagents/slurm/fullscale/generate_jobs.sh new file mode 100644 index 0000000..0bc5c0b --- /dev/null +++ b/collaborativeagents/slurm/fullscale/generate_jobs.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Generate all job scripts (6 methods × 4 profile ranges = 24 jobs) +# Each job: 50 profiles × 15 sessions = 750 sessions ≈ 7-8 hours + +METHODS="vanilla contextual reflection all_memory rag rag_vector" +RANGES="0:50 50:100 100:150 150:200" + +for method in $METHODS; do + for range in $RANGES; do + start=${range%:*} + end=${range#*:} + + cat > run_${method}_p${start}.sh << EOF +#!/bin/bash +#SBATCH --job-name=exp_${method}_p${start} +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_${method}_p${start}_%j.out +#SBATCH --error=exp_${method}_p${start}_%j.err + +# Full run: ${method} method, profiles ${start}-${end} (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="\${PWD}/src:\${PWD}/collaborativeagents:\${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \\ + --model \$MODEL_8B --port 8003 --tensor-parallel-size 1 \\ + --gpu-memory-utilization 0.5 --max-model-len 8192 \\ + --dtype bfloat16 --disable-log-requests & + +for i in \$(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \\ + --methods ${method} \\ + --datasets math-hard \\ + --n-profiles 200 \\ + --start-profile ${start} \\ + --end-profile ${end} \\ + --n-sessions 15 \\ + --max-turns 8 \\ + --use-vllm \\ + --use-openai-user \\ + --openai-user-model gpt-5-mini \\ + --reward-mode llm \\ + --vllm-agent-url http://localhost:8003/v1 \\ + --parallel-profiles 25 \\ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \\ + --output-dir ../results/fullscale_15sess + +echo "${method} p${start}-${end} complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true +EOF + chmod +x run_${method}_p${start}.sh + echo "Created run_${method}_p${start}.sh" + done +done + +echo "" +echo "Generated 24 job scripts (6 methods × 4 profile ranges)" +echo "Each job: 50 profiles × 15 sessions = 750 sessions" +echo "Estimated time per job: ~7-8 hours" diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh new file mode 100755 index 0000000..bb7968b --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p0_%j.out +#SBATCH --error=exp_all_memory_p0_%j.err + +# Full run: all_memory method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh new file mode 100755 index 0000000..21db6de --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p100_%j.out +#SBATCH --error=exp_all_memory_p100_%j.err + +# Full run: all_memory method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh new file mode 100755 index 0000000..da7a729 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p150_%j.out +#SBATCH --error=exp_all_memory_p150_%j.err + +# Full run: all_memory method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh new file mode 100755 index 0000000..60bc9ee --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p50_%j.out +#SBATCH --error=exp_all_memory_p50_%j.err + +# Full run: all_memory method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p0.sh b/collaborativeagents/slurm/fullscale/run_contextual_p0.sh new file mode 100755 index 0000000..6fa0211 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p0_%j.out +#SBATCH --error=exp_contextual_p0_%j.err + +# Full run: contextual method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p100.sh b/collaborativeagents/slurm/fullscale/run_contextual_p100.sh new file mode 100755 index 0000000..8250c19 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p100_%j.out +#SBATCH --error=exp_contextual_p100_%j.err + +# Full run: contextual method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p150.sh b/collaborativeagents/slurm/fullscale/run_contextual_p150.sh new file mode 100755 index 0000000..fb14058 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p150_%j.out +#SBATCH --error=exp_contextual_p150_%j.err + +# Full run: contextual method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p50.sh b/collaborativeagents/slurm/fullscale/run_contextual_p50.sh new file mode 100755 index 0000000..8b1788e --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p50_%j.out +#SBATCH --error=exp_contextual_p50_%j.err + +# Full run: contextual method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p0.sh b/collaborativeagents/slurm/fullscale/run_rag_p0.sh new file mode 100755 index 0000000..de4f038 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p0_%j.out +#SBATCH --error=exp_rag_p0_%j.err + +# Full run: rag method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p100.sh b/collaborativeagents/slurm/fullscale/run_rag_p100.sh new file mode 100755 index 0000000..c9b9d7e --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p100_%j.out +#SBATCH --error=exp_rag_p100_%j.err + +# Full run: rag method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p150.sh b/collaborativeagents/slurm/fullscale/run_rag_p150.sh new file mode 100755 index 0000000..0ec5e4f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p150_%j.out +#SBATCH --error=exp_rag_p150_%j.err + +# Full run: rag method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p50.sh b/collaborativeagents/slurm/fullscale/run_rag_p50.sh new file mode 100755 index 0000000..b625300 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p50_%j.out +#SBATCH --error=exp_rag_p50_%j.err + +# Full run: rag method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh new file mode 100755 index 0000000..1f28f8f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p0_%j.out +#SBATCH --error=exp_rag_vector_p0_%j.err + +# Full run: rag_vector method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh new file mode 100755 index 0000000..b658bab --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p100_%j.out +#SBATCH --error=exp_rag_vector_p100_%j.err + +# Full run: rag_vector method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh new file mode 100755 index 0000000..8c2458f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p150_%j.out +#SBATCH --error=exp_rag_vector_p150_%j.err + +# Full run: rag_vector method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh new file mode 100755 index 0000000..afb0164 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p50_%j.out +#SBATCH --error=exp_rag_vector_p50_%j.err + +# Full run: rag_vector method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p0.sh b/collaborativeagents/slurm/fullscale/run_reflection_p0.sh new file mode 100755 index 0000000..f5d5649 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p0_%j.out +#SBATCH --error=exp_reflection_p0_%j.err + +# Full run: reflection method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p100.sh b/collaborativeagents/slurm/fullscale/run_reflection_p100.sh new file mode 100755 index 0000000..68f7047 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p100_%j.out +#SBATCH --error=exp_reflection_p100_%j.err + +# Full run: reflection method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p150.sh b/collaborativeagents/slurm/fullscale/run_reflection_p150.sh new file mode 100755 index 0000000..a451e49 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p150_%j.out +#SBATCH --error=exp_reflection_p150_%j.err + +# Full run: reflection method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p50.sh b/collaborativeagents/slurm/fullscale/run_reflection_p50.sh new file mode 100755 index 0000000..dc977d7 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p50_%j.out +#SBATCH --error=exp_reflection_p50_%j.err + +# Full run: reflection method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh new file mode 100755 index 0000000..f5706c8 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p0_%j.out +#SBATCH --error=exp_vanilla_p0_%j.err + +# Full run: vanilla method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh new file mode 100755 index 0000000..8ca9ce1 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p100_%j.out +#SBATCH --error=exp_vanilla_p100_%j.err + +# Full run: vanilla method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh new file mode 100755 index 0000000..07ff6d3 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p150_%j.out +#SBATCH --error=exp_vanilla_p150_%j.err + +# Full run: vanilla method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh new file mode 100755 index 0000000..d77b881 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p50_%j.out +#SBATCH --error=exp_vanilla_p50_%j.err + +# Full run: vanilla method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/submit_all.sh b/collaborativeagents/slurm/fullscale/submit_all.sh new file mode 100644 index 0000000..5b76169 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/submit_all.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Submit all 24 jobs for full-scale experiment +# Total: 200 profiles × 6 methods × 15 sessions = 18,000 sessions +# Split: 6 methods × 4 profile ranges (50 each) = 24 jobs +# Per job: 50 profiles × 15 sessions = 750 sessions ≈ 7-8 hours + +echo "Submitting all 24 jobs for full-scale experiment..." +echo "Total: 200 profiles × 6 methods × 15 sessions = 18,000 sessions" +echo "Split: 24 jobs (6 methods × 4 profile ranges of 50)" +echo "" + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/slurm/fullscale + +JOBS="" + +for script in run_*.sh; do + if [[ "$script" != "generate_jobs.sh" && "$script" != "submit_all.sh" && "$script" != "test_run.sh" ]]; then + JOB_ID=$(sbatch "$script" | awk '{print $4}') + JOBS="$JOBS $JOB_ID" + echo "Submitted $script -> Job ID: $JOB_ID" + fi +done + +echo "" +echo "All jobs submitted!" +echo "Job IDs:$JOBS" +echo "" +echo "Monitor with: squeue -u \$USER" +echo "Check results in: collaborativeagents/results/fullscale_15sess/" diff --git a/collaborativeagents/slurm/fullscale/test_25parallel.sh b/collaborativeagents/slurm/fullscale/test_25parallel.sh new file mode 100644 index 0000000..09d5ddb --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_25parallel.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=test_25parallel +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=1:00:00 +#SBATCH --output=test_25parallel_%j.out +#SBATCH --error=test_25parallel_%j.err + +# Quick test: 25 profiles × 2 sessions × 1 method (vanilla) = 50 sessions +# With 25 parallel profiles to measure realistic throughput + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Test with 25 parallel profiles +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 25 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_25parallel_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err b/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err new file mode 100644 index 0000000..96ed829 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err @@ -0,0 +1,386 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=2749050) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 2 + +# Increase GPU utilization to 60% for higher throughput +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.6 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 50 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 50 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_50parallel_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err b/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err new file mode 100644 index 0000000..358fd24 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err @@ -0,0 +1,504 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=2003864) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 2 + +# Start vLLM server +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Test with vanilla (simplest method) +echo "=== Testing batched agent calls ===" +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 10 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 10 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_batch_fix_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err b/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err new file mode 100644 index 0000000..a7574bf --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err @@ -0,0 +1,165 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=2779888) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 3 + +echo "=== Starting 70B User Simulator (GPU 0-1, TP=2) ===" +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B \ + --port 8004 \ + --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.90 \ + --max-model-len 4096 \ + --quantization awq \ + --dtype float16 \ + --disable-log-requests \ + --guided-decoding-backend outlines & + +echo "=== Starting 8B Agent (GPU 2) ===" +CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B \ + --port 8003 \ + --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.50 \ + --max-model-len 8192 \ + --dtype bfloat16 \ + --disable-log-requests & + +# Wait for both servers +echo "Waiting for vLLM servers..." +for port in 8004 8003; do + for i in $(seq 1 120); do + curl -s http://localhost:$port/health > /dev/null 2>&1 && break + sleep 2 + done + echo " Port $port ready." +done + +cd collaborativeagents/scripts + +echo "" +echo "=== Running Test: 10 profiles × 2 sessions with LOCAL user simulator ===" +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 10 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --reward-mode llm \ + --parallel-profiles 10 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_local_user_$(date +%Y%m%d_%H%M%S) + +echo "" +echo "=== Test Complete ===" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_local_user_15652698.err b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err new file mode 100644 index 0000000..4acc458 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err @@ -0,0 +1,215 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +usage: api_server.py [-h] [--headless] [--api-server-count API_SERVER_COUNT] + [--config CONFIG] [--host HOST] [--port PORT] [--uds UDS] + [--uvicorn-log-level {critical,debug,error,info,trace,warning}] + [--disable-uvicorn-access-log | --no-disable-uvicorn-access-log] + [--allow-credentials | --no-allow-credentials] + [--allowed-origins ALLOWED_ORIGINS] + [--allowed-methods ALLOWED_METHODS] + [--allowed-headers ALLOWED_HEADERS] + [--api-key API_KEY [API_KEY ...]] + [--lora-modules LORA_MODULES [LORA_MODULES ...]] + [--chat-template CHAT_TEMPLATE] + [--chat-template-content-format {auto,openai,string}] + [--trust-request-chat-template | --no-trust-request-chat-template] + [--response-role RESPONSE_ROLE] + [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] + [--ssl-ca-certs SSL_CA_CERTS] + [--enable-ssl-refresh | --no-enable-ssl-refresh] + [--ssl-cert-reqs SSL_CERT_REQS] [--root-path ROOT_PATH] + [--middleware MIDDLEWARE] + [--return-tokens-as-token-ids | --no-return-tokens-as-token-ids] + [--disable-frontend-multiprocessing | --no-disable-frontend-multiprocessing] + [--enable-request-id-headers | --no-enable-request-id-headers] + [--enable-auto-tool-choice | --no-enable-auto-tool-choice] + [--exclude-tools-when-tool-choice-none | --no-exclude-tools-when-tool-choice-none] + [--tool-call-parser {deepseek_v3,deepseek_v31,deepseek_v32,ernie45,gigachat3,glm45,granite,granite-20b-fc,hermes,hunyuan_a13b,internlm,jamba,kimi_k2,llama3_json,llama4_json,llama4_pythonic,longcat,minimax,minimax_m2,mistral,olmo3,openai,phi4_mini_json,pythonic,qwen3_coder,qwen3_xml,seed_oss,step3,xlam} or name registered in --tool-parser-plugin] + [--tool-parser-plugin TOOL_PARSER_PLUGIN] + [--tool-server TOOL_SERVER] + [--log-config-file LOG_CONFIG_FILE] + [--max-log-len MAX_LOG_LEN] + [--disable-fastapi-docs | --no-disable-fastapi-docs] + [--enable-prompt-tokens-details | --no-enable-prompt-tokens-details] + [--enable-server-load-tracking | --no-enable-server-load-tracking] + [--enable-force-include-usage | --no-enable-force-include-usage] + [--enable-tokenizer-info-endpoint | --no-enable-tokenizer-info-endpoint] + [--enable-log-outputs | --no-enable-log-outputs] + [--h11-max-incomplete-event-size H11_MAX_INCOMPLETE_EVENT_SIZE] + [--h11-max-header-count H11_MAX_HEADER_COUNT] + [--log-error-stack | --no-log-error-stack] + [--tokens-only | --no-tokens-only] [--model MODEL] + [--runner {auto,draft,generate,pooling}] + [--convert {auto,classify,embed,none,reward}] + [--tokenizer TOKENIZER] + [--tokenizer-mode ['auto', 'deepseek_v32', 'hf', 'mistral', 'slow']] + [--trust-remote-code | --no-trust-remote-code] + [--dtype {auto,bfloat16,float,float16,float32,half}] + [--seed SEED] [--hf-config-path HF_CONFIG_PATH] + [--allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH] + [--allowed-media-domains ALLOWED_MEDIA_DOMAINS [ALLOWED_MEDIA_DOMAINS ...]] + [--revision REVISION] [--code-revision CODE_REVISION] + [--tokenizer-revision TOKENIZER_REVISION] + [--max-model-len MAX_MODEL_LEN] + [--quantization QUANTIZATION] + [--enforce-eager | --no-enforce-eager] + [--max-logprobs MAX_LOGPROBS] + [--logprobs-mode {processed_logits,processed_logprobs,raw_logits,raw_logprobs}] + [--disable-sliding-window | --no-disable-sliding-window] + [--disable-cascade-attn | --no-disable-cascade-attn] + [--skip-tokenizer-init | --no-skip-tokenizer-init] + [--enable-prompt-embeds | --no-enable-prompt-embeds] + [--served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]] + [--config-format ['auto', 'hf', 'mistral']] + [--hf-token [HF_TOKEN]] [--hf-overrides HF_OVERRIDES] + [--pooler-config POOLER_CONFIG] + [--logits-processor-pattern LOGITS_PROCESSOR_PATTERN] + [--generation-config GENERATION_CONFIG] + [--override-generation-config OVERRIDE_GENERATION_CONFIG] + [--enable-sleep-mode | --no-enable-sleep-mode] + [--model-impl ['auto', 'terratorch', 'transformers', 'vllm']] + [--override-attention-dtype OVERRIDE_ATTENTION_DTYPE] + [--logits-processors LOGITS_PROCESSORS [LOGITS_PROCESSORS ...]] + [--io-processor-plugin IO_PROCESSOR_PLUGIN] + [--load-format LOAD_FORMAT] [--download-dir DOWNLOAD_DIR] + [--safetensors-load-strategy SAFETENSORS_LOAD_STRATEGY] + [--model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG] + [--ignore-patterns IGNORE_PATTERNS [IGNORE_PATTERNS ...]] + [--use-tqdm-on-load | --no-use-tqdm-on-load] + [--pt-load-map-location PT_LOAD_MAP_LOCATION] + [--attention-backend ATTENTION_BACKEND] + [--reasoning-parser REASONING_PARSER] + [--reasoning-parser-plugin REASONING_PARSER_PLUGIN] + [--distributed-executor-backend ['external_launcher', 'mp', 'ray', 'uni']] + [--pipeline-parallel-size PIPELINE_PARALLEL_SIZE] + [--master-addr MASTER_ADDR] [--master-port MASTER_PORT] + [--nnodes NNODES] [--node-rank NODE_RANK] + [--tensor-parallel-size TENSOR_PARALLEL_SIZE] + [--decode-context-parallel-size DECODE_CONTEXT_PARALLEL_SIZE] + [--dcp-kv-cache-interleave-size DCP_KV_CACHE_INTERLEAVE_SIZE] + [--cp-kv-cache-interleave-size CP_KV_CACHE_INTERLEAVE_SIZE] + [--prefill-context-parallel-size PREFILL_CONTEXT_PARALLEL_SIZE] + [--data-parallel-size DATA_PARALLEL_SIZE] + [--data-parallel-rank DATA_PARALLEL_RANK] + [--data-parallel-start-rank DATA_PARALLEL_START_RANK] + [--data-parallel-size-local DATA_PARALLEL_SIZE_LOCAL] + [--data-parallel-address DATA_PARALLEL_ADDRESS] + [--data-parallel-rpc-port DATA_PARALLEL_RPC_PORT] + [--data-parallel-backend DATA_PARALLEL_BACKEND] + [--data-parallel-hybrid-lb | --no-data-parallel-hybrid-lb | -dph] + [--data-parallel-external-lb | --no-data-parallel-external-lb | -dpe] + [--enable-expert-parallel | --no-enable-expert-parallel] + [--all2all-backend {allgather_reducescatter,deepep_high_throughput,deepep_low_latency,flashinfer_all2allv,naive,pplx,None}] + [--enable-dbo | --no-enable-dbo] + [--dbo-decode-token-threshold DBO_DECODE_TOKEN_THRESHOLD] + [--dbo-prefill-token-threshold DBO_PREFILL_TOKEN_THRESHOLD] + [--disable-nccl-for-dp-synchronization | --no-disable-nccl-for-dp-synchronization] + [--enable-eplb | --no-enable-eplb] + [--eplb-config EPLB_CONFIG] + [--expert-placement-strategy {linear,round_robin}] + [--max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS] + [--ray-workers-use-nsight | --no-ray-workers-use-nsight] + [--disable-custom-all-reduce | --no-disable-custom-all-reduce] + [--worker-cls WORKER_CLS] + [--worker-extension-cls WORKER_EXTENSION_CLS] + [--block-size {1,8,16,32,64,128,256}] + [--gpu-memory-utilization GPU_MEMORY_UTILIZATION] + [--kv-cache-memory-bytes KV_CACHE_MEMORY_BYTES] + [--swap-space SWAP_SPACE] + [--kv-cache-dtype {auto,bfloat16,fp8,fp8_ds_mla,fp8_e4m3,fp8_e5m2,fp8_inc}] + [--num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE] + [--enable-prefix-caching | --no-enable-prefix-caching] + [--prefix-caching-hash-algo {sha256,sha256_cbor,xxhash,xxhash_cbor}] + [--cpu-offload-gb CPU_OFFLOAD_GB] + [--calculate-kv-scales | --no-calculate-kv-scales] + [--kv-sharing-fast-prefill | --no-kv-sharing-fast-prefill] + [--mamba-cache-dtype {auto,float16,float32}] + [--mamba-ssm-cache-dtype {auto,float16,float32}] + [--mamba-block-size MAMBA_BLOCK_SIZE] + [--kv-offloading-size KV_OFFLOADING_SIZE] + [--kv-offloading-backend {lmcache,native,None}] + [--limit-mm-per-prompt LIMIT_MM_PER_PROMPT] + [--enable-mm-embeds | --no-enable-mm-embeds] + [--media-io-kwargs MEDIA_IO_KWARGS] + [--mm-processor-kwargs MM_PROCESSOR_KWARGS] + [--mm-processor-cache-gb MM_PROCESSOR_CACHE_GB] + [--mm-processor-cache-type {lru,shm}] + [--mm-shm-cache-max-object-size-mb MM_SHM_CACHE_MAX_OBJECT_SIZE_MB] + [--mm-encoder-tp-mode {data,weights}] + [--mm-encoder-attn-backend MM_ENCODER_ATTN_BACKEND] + [--interleave-mm-strings | --no-interleave-mm-strings] + [--skip-mm-profiling | --no-skip-mm-profiling] + [--video-pruning-rate VIDEO_PRUNING_RATE] + [--enable-lora | --no-enable-lora] + [--max-loras MAX_LORAS] + [--max-lora-rank {1,8,16,32,64,128,256,320,512}] + [--lora-dtype {auto,bfloat16,float16}] + [--max-cpu-loras MAX_CPU_LORAS] + [--fully-sharded-loras | --no-fully-sharded-loras] + [--default-mm-loras DEFAULT_MM_LORAS] + [--show-hidden-metrics-for-version SHOW_HIDDEN_METRICS_FOR_VERSION] + [--otlp-traces-endpoint OTLP_TRACES_ENDPOINT] + [--collect-detailed-traces {all,model,worker,None} [{all,model,worker,None} ...]] + [--kv-cache-metrics | --no-kv-cache-metrics] + [--kv-cache-metrics-sample KV_CACHE_METRICS_SAMPLE] + [--cudagraph-metrics | --no-cudagraph-metrics] + [--enable-layerwise-nvtx-tracing | --no-enable-layerwise-nvtx-tracing] + [--max-num-batched-tokens MAX_NUM_BATCHED_TOKENS] + [--max-num-seqs MAX_NUM_SEQS] + [--max-num-partial-prefills MAX_NUM_PARTIAL_PREFILLS] + [--max-long-partial-prefills MAX_LONG_PARTIAL_PREFILLS] + [--long-prefill-token-threshold LONG_PREFILL_TOKEN_THRESHOLD] + [--scheduling-policy {fcfs,priority}] + [--enable-chunked-prefill | --no-enable-chunked-prefill] + [--disable-chunked-mm-input | --no-disable-chunked-mm-input] + [--scheduler-cls SCHEDULER_CLS] + [--disable-hybrid-kv-cache-manager | --no-disable-hybrid-kv-cache-manager] + [--async-scheduling | --no-async-scheduling] + [--stream-interval STREAM_INTERVAL] + [--cudagraph-capture-sizes CUDAGRAPH_CAPTURE_SIZES [CUDAGRAPH_CAPTURE_SIZES ...]] + [--max-cudagraph-capture-size MAX_CUDAGRAPH_CAPTURE_SIZE] + [--speculative-config SPECULATIVE_CONFIG] + [--kv-transfer-config KV_TRANSFER_CONFIG] + [--kv-events-config KV_EVENTS_CONFIG] + [--ec-transfer-config EC_TRANSFER_CONFIG] + [--compilation-config COMPILATION_CONFIG] + [--attention-config ATTENTION_CONFIG] + [--additional-config ADDITIONAL_CONFIG] + [--structured-outputs-config STRUCTURED_OUTPUTS_CONFIG] + [--profiler-config PROFILER_CONFIG] + [--optimization-level OPTIMIZATION_LEVEL] + [--disable-log-stats] [--aggregate-engine-logging] + [--enable-log-requests | --no-enable-log-requests] + [--disable-log-requests | --no-disable-log-requests] + [model_tag] +api_server.py: error: unrecognized arguments: --guided-decoding-backend +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=3603174) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00/dev/null || true +sleep 2 + +# GPU 0: vLLM 8B agent, GPU 1: adapter models +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +# Wait for server +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM 8B agent server ready." + +cd collaborativeagents/scripts + +# Test run: vanilla + rag_vector (light + heavy methods) +python run_experiments.py \ + --methods vanilla,rag_vector \ + --datasets math-hard \ + --n-profiles 2 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 2 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_test_$(date +%Y%m%d_%H%M%S) + +echo "Test run complete!" + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/generate_profiles.sh b/collaborativeagents/slurm/generate_profiles.sh new file mode 100644 index 0000000..936a1e6 --- /dev/null +++ b/collaborativeagents/slurm/generate_profiles.sh @@ -0,0 +1,39 @@ +#!/bin/bash +#SBATCH --job-name=gen_profiles +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=cpu +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --mem=16G +#SBATCH --time=00:30:00 +#SBATCH --output=logs/gen_profiles_%j.out +#SBATCH --error=logs/gen_profiles_%j.err + +# Generate 100 user profiles from schema (no LLM required) +# This is fast and doesn't need GPU + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +# Create logs directory +mkdir -p collaborativeagents/slurm/logs +mkdir -p collaborativeagents/data/complex_profiles_v2 + +echo "Starting profile generation at $(date)" +echo "Job ID: $SLURM_JOB_ID" + +# Use the eval environment (has required packages) +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +# Generate profiles from schema (no LLM needed) +python collaborativeagents/scripts/generate_profiles_v2.py \ + --num_profiles 100 \ + --from_schema collaborativeagents/data/preference_schema_v2_sample.json \ + --output collaborativeagents/data/complex_profiles_v2/profiles_100.jsonl \ + --seed 42 + +echo "Profile generation completed at $(date)" +echo "Output: collaborativeagents/data/complex_profiles_v2/profiles_100.jsonl" diff --git a/collaborativeagents/slurm/logs/all_memory_14360420.err b/collaborativeagents/slurm/logs/all_memory_14360420.err new file mode 100644 index 0000000..93582b3 --- /dev/null +++ b/collaborativeagents/slurm/logs/all_memory_14360420.err @@ -0,0 +1,127 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +2025-12-27 01:56:00,092 - INFO - Loaded dataset: mmlu +2025-12-27 01:56:00,092 - INFO - Loaded dataset: aime +2025-12-27 01:56:00,092 - INFO - Loaded dataset: math-hard +2025-12-27 01:56:00,092 - INFO - Loaded dataset: humaneval +2025-12-27 01:56:00,108 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-27 01:56:00,108 - INFO - Running method: all_memory +`torch_dtype` is deprecated! Use `dtype` instead! + Loading checkpoint shards: 0%| | 0/4 [00:00 + main() + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 608, in main + analysis = runner.run_all() + ^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 414, in run_all + results = self.run_method(method) + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 367, in run_method + samples = dataset.get_testset() + ^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/datasets_extended.py", line 71, in get_testset + self._test_data = self._load_data("test")[:self.eval_size] + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/datasets_extended.py", line 153, in _load_data + solution=item["answer"], + ~~~~^^^^^^^^^^ +KeyError: 'answer' diff --git a/collaborativeagents/slurm/logs/full_exp_14358390.err b/collaborativeagents/slurm/logs/full_exp_14358390.err new file mode 100644 index 0000000..65669a6 --- /dev/null +++ b/collaborativeagents/slurm/logs/full_exp_14358390.err @@ -0,0 +1,97 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +2025-12-26 11:03:58,116 - INFO - Loaded dataset: mmlu +2025-12-26 11:03:58,116 - INFO - Loaded dataset: aime +2025-12-26 11:03:58,116 - INFO - Loaded dataset: math-hard +2025-12-26 11:03:58,116 - INFO - Loaded dataset: humaneval +2025-12-26 11:03:58,213 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-26 11:03:58,214 - INFO - Running method: vanilla +`torch_dtype` is deprecated! Use `dtype` instead! + Loading checkpoint shards: 0%| | 0/4 [00:00 + from adapters.personalized_llm_adapter import PersonalizedLLMAdapter, create_baseline_adapter + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/adapters/__init__.py", line 3, in + from .personalized_llm_adapter import ( + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/adapters/personalized_llm_adapter.py", line 21, in + from personalization.serving.personalized_llm import ( + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/__init__.py", line 5, in + from personalization.serving.personalized_llm import ( + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/personalized_llm.py", line 35, in + from personalization.models.reranker.qwen3_reranker import Qwen3Reranker + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/models/reranker/qwen3_reranker.py", line 26 + self.model = AutoModelForCausalLM.from_pretrained( + ^ +IndentationError: expected an indented block after 'else' statement on line 24 diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355797.err b/collaborativeagents/slurm/logs/run_expts_a100_14355797.err new file mode 100644 index 0000000..28ce959 --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355797.err @@ -0,0 +1,14 @@ +Traceback (most recent call last): + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 31, in + from adapters.personalized_llm_adapter import PersonalizedLLMAdapter, create_baseline_adapter + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/adapters/__init__.py", line 3, in + from .personalized_llm_adapter import ( + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/adapters/personalized_llm_adapter.py", line 21, in + from personalization.serving.personalized_llm import ( + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/__init__.py", line 5, in + from personalization.serving.personalized_llm import ( + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/personalized_llm.py", line 38, in + from personalization.user_model.features import ItemProjection + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/user_model/features.py", line 3, in + from sklearn.decomposition import PCA +ModuleNotFoundError: No module named 'sklearn' diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355807.err b/collaborativeagents/slurm/logs/run_expts_a100_14355807.err new file mode 100644 index 0000000..33fa583 --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355807.err @@ -0,0 +1,10 @@ +Traceback (most recent call last): + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 484, in + main() + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 471, in main + runner = ExperimentRunner(config) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 99, in __init__ + self.conflict_generator = ConflictScenarioGenerator() + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +TypeError: ConflictScenarioGenerator.__init__() missing 1 required positional argument: 'profile' diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355816.err b/collaborativeagents/slurm/logs/run_expts_a100_14355816.err new file mode 100644 index 0000000..f147ab6 --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355816.err @@ -0,0 +1,22 @@ +2025-12-25 07:53:36,898 - INFO - Loaded dataset: math-500 +Traceback (most recent call last): + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 484, in + main() + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 471, in main + runner = ExperimentRunner(config) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 111, in __init__ + self.profiles = self._load_profiles() + ^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 117, in _load_profiles + profiles = json.load(f) + ^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/json/__init__.py", line 293, in load + return loads(fp.read(), + ^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/json/__init__.py", line 346, in loads + return _default_decoder.decode(s) + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/json/decoder.py", line 340, in decode + raise JSONDecodeError("Extra data", s, end) +json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 10782) diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355842.err b/collaborativeagents/slurm/logs/run_expts_a100_14355842.err new file mode 100644 index 0000000..0144ee9 --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355842.err @@ -0,0 +1,23 @@ +2025-12-25 07:55:08,518 - INFO - Loaded dataset: math-500 +2025-12-25 07:55:08,527 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-25 07:55:08,528 - INFO - Running method: rag_vector +2025-12-25 07:55:08,528 - INFO - Profile 1/2 + Generating test split: 0%| | 0/500 [00:00 + main() + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 476, in main + analysis = runner.run_all() + ^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 292, in run_all + analysis = self._analyze_results(all_results) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 365, in _analyze_results + best = max(values, key=values.get) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +ValueError: max() arg is an empty sequence diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355851.err b/collaborativeagents/slurm/logs/run_expts_a100_14355851.err new file mode 100644 index 0000000..59bbe1a --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355851.err @@ -0,0 +1,28 @@ +2025-12-25 07:58:42,438 - INFO - Loaded dataset: math-500 +2025-12-25 07:58:42,447 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-25 07:58:42,448 - INFO - Running method: rag_vector +2025-12-25 07:58:42,448 - INFO - Profile 1/2 +2025-12-25 07:58:47,959 - ERROR - Error in session: ConversationGenerator.__init__() got an unexpected keyword argument 'user_model' +2025-12-25 07:58:47,959 - ERROR - Error in session: ConversationGenerator.__init__() got an unexpected keyword argument 'user_model' +2025-12-25 07:58:47,960 - INFO - Profile 2/2 +2025-12-25 07:58:47,960 - ERROR - Error in session: ConversationGenerator.__init__() got an unexpected keyword argument 'user_model' +2025-12-25 07:58:47,960 - ERROR - Error in session: ConversationGenerator.__init__() got an unexpected keyword argument 'user_model' +2025-12-25 07:58:47,962 - WARNING - No values for metric task_success_rate, skipping comparison +2025-12-25 07:58:47,962 - WARNING - No values for metric avg_user_tokens, skipping comparison +2025-12-25 07:58:47,962 - WARNING - No values for metric avg_total_tokens, skipping comparison +2025-12-25 07:58:47,962 - WARNING - No values for metric avg_enforcement_count, skipping comparison +2025-12-25 07:58:47,962 - WARNING - No values for metric avg_preference_compliance, skipping comparison +2025-12-25 07:58:47,962 - WARNING - No values for metric conflict_resolution_accuracy, skipping comparison +2025-12-25 07:58:47,962 - WARNING - No values for metric over_personalization_rate, skipping comparison +Traceback (most recent call last): + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 491, in + main() + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 479, in main + analysis = runner.run_all() + ^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 299, in run_all + self._generate_report(analysis) + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 414, in _generate_report + best = analysis["comparison"][metric_key]["best_method"] + ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^ +KeyError: 'task_success_rate' diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355856.err b/collaborativeagents/slurm/logs/run_expts_a100_14355856.err new file mode 100644 index 0000000..608e91f --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355856.err @@ -0,0 +1,10 @@ +2025-12-25 08:08:52,658 - INFO - Loaded dataset: math-500 +2025-12-25 08:08:52,698 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-25 08:08:52,699 - INFO - Running method: rag_vector +2025-12-25 08:08:52,699 - INFO - Profile 1/2 +2025-12-25 08:08:55,383 - ERROR - Error in session: [Errno 2] No such file or directory: '/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/configs/local_models.yaml' +2025-12-25 08:08:55,383 - ERROR - Error in session: [Errno 2] No such file or directory: '/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/configs/local_models.yaml' +2025-12-25 08:08:55,383 - INFO - Profile 2/2 +2025-12-25 08:08:55,384 - ERROR - Error in session: [Errno 2] No such file or directory: '/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/configs/local_models.yaml' +2025-12-25 08:08:55,384 - ERROR - Error in session: [Errno 2] No such file or directory: '/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/configs/local_models.yaml' +2025-12-25 08:08:55,390 - INFO - Report saved to ../results/test_a100_20251225_080844/20251225_080852/report.md diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355861.err b/collaborativeagents/slurm/logs/run_expts_a100_14355861.err new file mode 100644 index 0000000..fac726b --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355861.err @@ -0,0 +1,14 @@ +2025-12-25 08:12:25,638 - INFO - Loaded dataset: math-500 +2025-12-25 08:12:25,647 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-25 08:12:25,648 - INFO - Running method: rag_vector +2025-12-25 08:12:25,648 - INFO - Profile 1/2 +2025-12-25 08:12:28,020 - ERROR - Error in session: models/qwen3-embedding-8b is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' +If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=` +2025-12-25 08:12:28,062 - ERROR - Error in session: models/qwen3-embedding-8b is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' +If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=` +2025-12-25 08:12:28,062 - INFO - Profile 2/2 +2025-12-25 08:12:28,107 - ERROR - Error in session: models/qwen3-embedding-8b is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' +If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=` +2025-12-25 08:12:28,155 - ERROR - Error in session: models/qwen3-embedding-8b is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' +If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=` +2025-12-25 08:12:28,189 - INFO - Report saved to ../results/test_a100_20251225_081218/20251225_081225/report.md diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355863.err b/collaborativeagents/slurm/logs/run_expts_a100_14355863.err new file mode 100644 index 0000000..1a41e92 --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355863.err @@ -0,0 +1,11 @@ +2025-12-25 08:15:58,390 - INFO - Loaded dataset: math-500 +2025-12-25 08:15:58,399 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-25 08:15:58,399 - INFO - Running method: rag_vector +2025-12-25 08:15:58,399 - INFO - Profile 1/2 +`torch_dtype` is deprecated! Use `dtype` instead! +2025-12-25 08:16:02,559 - ERROR - Error in session: Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate` +2025-12-25 08:16:02,854 - ERROR - Error in session: Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate` +2025-12-25 08:16:02,874 - INFO - Profile 2/2 +2025-12-25 08:16:03,126 - ERROR - Error in session: Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate` +2025-12-25 08:16:03,394 - ERROR - Error in session: Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate` +2025-12-25 08:16:03,459 - INFO - Report saved to ../results/test_a100_20251225_081551/20251225_081558/report.md diff --git a/collaborativeagents/slurm/logs/run_expts_a100_14355865.err b/collaborativeagents/slurm/logs/run_expts_a100_14355865.err new file mode 100644 index 0000000..769bc20 --- /dev/null +++ b/collaborativeagents/slurm/logs/run_expts_a100_14355865.err @@ -0,0 +1,19 @@ +2025-12-25 08:20:02,816 - INFO - Loaded dataset: math-500 +2025-12-25 08:20:02,951 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-25 08:20:02,952 - INFO - Running method: rag_vector +2025-12-25 08:20:02,952 - INFO - Profile 1/2 +`torch_dtype` is deprecated! Use `dtype` instead! + Loading checkpoint shards: 0%| | 0/4 [00:00 + results["full_session"] = test_full_session() + ^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/test_multiturn.py", line 171, in test_full_session + adapter = create_baseline_adapter("vanilla") + ^^^^^^^^^^^^^^^^^^^^^^^ +NameError: name 'create_baseline_adapter' is not defined diff --git a/collaborativeagents/slurm/logs/test_multiturn_14357119.err b/collaborativeagents/slurm/logs/test_multiturn_14357119.err new file mode 100644 index 0000000..1a66fce --- /dev/null +++ b/collaborativeagents/slurm/logs/test_multiturn_14357119.err @@ -0,0 +1,46 @@ +`torch_dtype` is deprecated! Use `dtype` instead! + Loading checkpoint shards: 0%| | 0/4 [00:00 + results["full_session"] = test_full_session() + ^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/test_multiturn.py", line 173, in test_full_session + adapter.initialize() + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/adapters/personalized_llm_adapter.py", line 87, in initialize + self._llm = PersonalizedLLM( + ^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/personalized_llm.py", line 227, in __init__ + self._load_models() + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/personalized_llm.py", line 295, in _load_models + self._reranker = Qwen3Reranker( + ^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/models/reranker/qwen3_reranker.py", line 26, in __init__ + self.model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 604, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 277, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5048, in from_pretrained + ) = cls._load_pretrained_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5468, in _load_pretrained_model + _error_msgs, disk_offload_index = load_shard_file(args) + ^^^^^^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 843, in load_shard_file + disk_offload_index = _load_state_dict_into_meta_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 770, in _load_state_dict_into_meta_model + _load_parameter_into_model(model, param_name, param.to(param_device)) + ^^^^^^^^^^^^^^^^^^^^^^ +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 1 has a total capacity of 39.49 GiB of which 30.31 MiB is free. Including non-PyTorch memory, this process has 39.46 GiB memory in use. Of the allocated memory 38.87 GiB is allocated by PyTorch, and 87.01 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/collaborativeagents/slurm/logs/vanilla_14360419.err b/collaborativeagents/slurm/logs/vanilla_14360419.err new file mode 100644 index 0000000..467f4f7 --- /dev/null +++ b/collaborativeagents/slurm/logs/vanilla_14360419.err @@ -0,0 +1,138 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +2025-12-27 01:32:07,469 - INFO - Loaded dataset: mmlu +2025-12-27 01:32:07,469 - INFO - Loaded dataset: aime +2025-12-27 01:32:07,469 - INFO - Loaded dataset: math-hard +2025-12-27 01:32:07,469 - INFO - Loaded dataset: humaneval +2025-12-27 01:32:07,504 - INFO - Loaded 100 profiles from ../data/complex_profiles_v2/profiles_100.jsonl +2025-12-27 01:32:07,505 - INFO - Running method: vanilla +`torch_dtype` is deprecated! Use `dtype` instead! + Loading checkpoint shards: 0%| | 0/4 [00:00", line 198, in _run_module_as_main +(APIServer pid=3643829) File "", line 88, in _run_code +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 1469, in +(APIServer pid=3643829) uvloop.run(run_server(args)) +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/uvloop/__init__.py", line 92, in run +(APIServer pid=3643829) return runner.run(wrapper()) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/asyncio/runners.py", line 118, in run +(APIServer pid=3643829) return self._loop.run_until_complete(task) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/uvloop/__init__.py", line 48, in wrapper +(APIServer pid=3643829) return await main +(APIServer pid=3643829) ^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 1398, in run_server +(APIServer pid=3643829) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 1417, in run_server_worker +(APIServer pid=3643829) async with build_async_engine_client( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/contextlib.py", line 210, in __aenter__ +(APIServer pid=3643829) return await anext(self.gen) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 172, in build_async_engine_client +(APIServer pid=3643829) async with build_async_engine_client_from_engine_args( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/contextlib.py", line 210, in __aenter__ +(APIServer pid=3643829) return await anext(self.gen) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 213, in build_async_engine_client_from_engine_args +(APIServer pid=3643829) async_llm = AsyncLLM.from_vllm_config( +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 215, in from_vllm_config +(APIServer pid=3643829) return cls( +(APIServer pid=3643829) ^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 134, in __init__ +(APIServer pid=3643829) self.engine_core = EngineCoreClient.make_async_mp_client( +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 121, in make_async_mp_client +(APIServer pid=3643829) return AsyncMPClient(*client_args) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 820, in __init__ +(APIServer pid=3643829) super().__init__( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 477, in __init__ +(APIServer pid=3643829) with launch_core_engines(vllm_config, executor_class, log_stats) as ( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/contextlib.py", line 144, in __exit__ +(APIServer pid=3643829) next(self.gen) +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/utils.py", line 903, in launch_core_engines +(APIServer pid=3643829) wait_for_engine_startup( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/utils.py", line 960, in wait_for_engine_startup +(APIServer pid=3643829) raise RuntimeError( +(APIServer pid=3643829) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {} +[2025-12-29T07:04:21.056] error: *** JOB 14367370 ON gpua051 CANCELLED AT 2025-12-29T07:04:21 DUE to SIGNAL Terminated *** diff --git a/collaborativeagents/slurm/logs/vllm_only_14367345.err b/collaborativeagents/slurm/logs/vllm_only_14367345.err new file mode 100644 index 0000000..8713769 --- /dev/null +++ b/collaborativeagents/slurm/logs/vllm_only_14367345.err @@ -0,0 +1 @@ +/usr/bin/python: Error while finding module specification for 'vllm.entrypoints.openai.api_server' (ModuleNotFoundError: No module named 'vllm') diff --git a/collaborativeagents/slurm/rag_15640364.err b/collaborativeagents/slurm/rag_15640364.err new file mode 100644 index 0000000..9bd4347 --- /dev/null +++ b/collaborativeagents/slurm/rag_15640364.err @@ -0,0 +1,498 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +(EngineCore_DP0 pid=2060309) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00 + main() + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 1169, in main + analysis = runner.run_all() + ^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 918, in run_all + results = self.run_method(method) + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 536, in run_method + return self._run_method_batch( + ^^^^^^^^^^^^^^^^^^^^^^^ + File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/run_experiments.py", line 893, in _run_method_batch + with open(results_file, "w") as f: +OSError: [Errno 5] Input/output error diff --git a/collaborativeagents/slurm/rerun_reflection.sbatch b/collaborativeagents/slurm/rerun_reflection.sbatch new file mode 100644 index 0000000..aac2ed3 --- /dev/null +++ b/collaborativeagents/slurm/rerun_reflection.sbatch @@ -0,0 +1,83 @@ +#!/bin/bash +#SBATCH --job-name=refl_fix +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8-interactive +#SBATCH --gres=gpu:4 +#SBATCH --time=01:00:00 +#SBATCH --mem=200G +#SBATCH --cpus-per-task=32 +#SBATCH --output=%x-%j.out +#SBATCH --error=%x-%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +source ~/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model:$PYTHONPATH + +PROFILE_PATH="collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl" +AGENT_MODEL="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +USER_MODEL="meta-llama/Llama-3.1-70B-Instruct" + +echo "=== Starting vLLM servers ===" +date + +# User simulator on GPUs 0,1 (70B) +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $USER_MODEL \ + --port 8004 --tensor-parallel-size 2 --gpu-memory-utilization 0.90 \ + --max-model-len 16384 --dtype bfloat16 --download-dir $HF_HOME & + +# Agent on GPUs 2,3 (8B) +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $AGENT_MODEL \ + --port 8003 --tensor-parallel-size 2 --gpu-memory-utilization 0.90 \ + --max-model-len 16384 --dtype bfloat16 & + +# Wait for servers +echo "Waiting for vLLM servers..." +for i in {1..200}; do + if curl -s http://localhost:8004/health > /dev/null 2>&1; then + echo "User simulator (8004) ready after $((i*5)) seconds" + break + fi + sleep 5 +done +for i in {1..60}; do + if curl -s http://localhost:8003/health > /dev/null 2>&1; then + echo "Agent (8003) ready after $((i*5)) seconds" + break + fi + sleep 5 +done +echo "Both vLLM servers ready" +sleep 10 + +# Run profile 1 (user_14b429db - had empty response bug) +echo "=== Running profile 1 (user_14b429db) ===" +python collaborativeagents/scripts/run_experiments.py \ + --methods reflection \ + --n-profiles 5 \ + --n-sessions 15 \ + --start-profile 1 --end-profile 2 \ + --output-dir results/reflection_rerun \ + --profile-path $PROFILE_PATH \ + --datasets math-hard \ + --use-vllm --parallel-profiles 1 --no-batch-processing + +# Run profile 4 (user_a0a3ed44 - was missing) +echo "=== Running profile 4 (user_a0a3ed44) ===" +python collaborativeagents/scripts/run_experiments.py \ + --methods reflection \ + --n-profiles 5 \ + --n-sessions 15 \ + --start-profile 4 --end-profile 5 \ + --output-dir results/reflection_rerun \ + --profile-path $PROFILE_PATH \ + --datasets math-hard \ + --use-vllm --parallel-profiles 1 --no-batch-processing + +pkill -f "vllm.entrypoints" 2>/dev/null || true +echo "Done!" diff --git a/collaborativeagents/slurm/run_all_gpt_user_test.sh b/collaborativeagents/slurm/run_all_gpt_user_test.sh new file mode 100644 index 0000000..26a1677 --- /dev/null +++ b/collaborativeagents/slurm/run_all_gpt_user_test.sh @@ -0,0 +1,67 @@ +#!/bin/bash +#SBATCH --job-name=gpt_user_test +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=4:00:00 +#SBATCH --output=gpt_user_test_%j.out +#SBATCH --error=gpt_user_test_%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +# Load OpenAI API key +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +# GPU 0: vLLM 8B agent, GPU 1: adapter models (embedding/extractor/reranker) +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +# Wait for server +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM 8B agent server ready." + +cd collaborativeagents/scripts + +# Large scale test: 50 profiles in parallel, 3 sessions each, all 6 methods +python run_experiments.py \ + --methods vanilla,contextual,reflection,all_memory,rag,rag_vector \ + --datasets math-hard \ + --n-profiles 20 \ + --n-sessions 5 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5 \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 20 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/gpt_user_scale_test_$(date +%Y%m%d_%H%M%S) + +echo "All methods test complete!" + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_all_memory.sh b/collaborativeagents/slurm/run_all_memory.sh new file mode 100755 index 0000000..b32cde6 --- /dev/null +++ b/collaborativeagents/slurm/run_all_memory.sh @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=all_memory +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=2:00:00 +#SBATCH --output=all_memory_%j.out +#SBATCH --error=all_memory_%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a; source .env; set +a +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 20 \ + --n-sessions 5 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5 \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 20 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/gpt_user_scale_all_memory_$(date +%Y%m%d_%H%M%S) + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_all_memory_v2.sh b/collaborativeagents/slurm/run_all_memory_v2.sh new file mode 100755 index 0000000..4caa29a --- /dev/null +++ b/collaborativeagents/slurm/run_all_memory_v2.sh @@ -0,0 +1,34 @@ +#!/bin/bash +#SBATCH --job-name=all_mem_v2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/all_memory_v2_%j.out +#SBATCH --error=logs/all_memory_v2_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts + +# all_memory with FIXED memory paths +python run_experiments.py \ + --methods all_memory \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/all_memory_v2_$(date +%Y%m%d_%H%M%S) diff --git a/collaborativeagents/slurm/run_contextual.sh b/collaborativeagents/slurm/run_contextual.sh new file mode 100755 index 0000000..c5ac1b0 --- /dev/null +++ b/collaborativeagents/slurm/run_contextual.sh @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=contextual +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/contextual_%j.out +#SBATCH --error=logs/contextual_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods contextual \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/contextual_$(date +%Y%m%d_%H%M%S) diff --git a/collaborativeagents/slurm/run_contextual_p0_9.sh b/collaborativeagents/slurm/run_contextual_p0_9.sh new file mode 100755 index 0000000..b78afd0 --- /dev/null +++ b/collaborativeagents/slurm/run_contextual_p0_9.sh @@ -0,0 +1,36 @@ +#!/bin/bash +#SBATCH --job-name=ctx_p0_9 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/contextual_p0_9_%j.out +#SBATCH --error=logs/contextual_p0_9_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts + +# Run profiles 0-9 (10 profiles, ~40 hours) +python run_experiments.py \ + --methods contextual \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/contextual_20251227_020146 \ + --start-profile 0 \ + --end-profile 10 diff --git a/collaborativeagents/slurm/run_contextual_p10_19.sh b/collaborativeagents/slurm/run_contextual_p10_19.sh new file mode 100755 index 0000000..64bd5e2 --- /dev/null +++ b/collaborativeagents/slurm/run_contextual_p10_19.sh @@ -0,0 +1,36 @@ +#!/bin/bash +#SBATCH --job-name=ctx_p10_19 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/contextual_p10_19_%j.out +#SBATCH --error=logs/contextual_p10_19_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts + +# Run profiles 10-19 (10 profiles, ~40 hours) +python run_experiments.py \ + --methods contextual \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/contextual_20251227_020146 \ + --start-profile 10 \ + --end-profile 20 diff --git a/collaborativeagents/slurm/run_contextual_p20_29.sh b/collaborativeagents/slurm/run_contextual_p20_29.sh new file mode 100755 index 0000000..fb94734 --- /dev/null +++ b/collaborativeagents/slurm/run_contextual_p20_29.sh @@ -0,0 +1,36 @@ +#!/bin/bash +#SBATCH --job-name=ctx_p20_29 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/contextual_p20_29_%j.out +#SBATCH --error=logs/contextual_p20_29_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts + +# Run profiles 20-29 (10 profiles, ~40 hours) +python run_experiments.py \ + --methods contextual \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/contextual_20251227_020146 \ + --start-profile 20 \ + --end-profile 30 diff --git a/collaborativeagents/slurm/run_contextual_resume.sh b/collaborativeagents/slurm/run_contextual_resume.sh new file mode 100755 index 0000000..2dec969 --- /dev/null +++ b/collaborativeagents/slurm/run_contextual_resume.sh @@ -0,0 +1,34 @@ +#!/bin/bash +#SBATCH --job-name=ctx_resume +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/contextual_resume_%j.out +#SBATCH --error=logs/contextual_resume_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts + +# Resume from the EXISTING output directory (uses checkpoint) +python run_experiments.py \ + --methods contextual \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/contextual_20251227_020146 diff --git a/collaborativeagents/slurm/run_contextual_v2.sh b/collaborativeagents/slurm/run_contextual_v2.sh new file mode 100755 index 0000000..0cf69e0 --- /dev/null +++ b/collaborativeagents/slurm/run_contextual_v2.sh @@ -0,0 +1,59 @@ +#!/bin/bash +#SBATCH --job-name=ctx_v2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:4 +#SBATCH --mem=200G +#SBATCH --time=2:00:00 +#SBATCH --output=ctx_v2_%j.out +#SBATCH --error=ctx_v2_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B --port 8004 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 --max-model-len 16384 \ + --download-dir $HF_HOME --dtype bfloat16 --disable-log-requests & + +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.90 --max-model-len 16384 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 120); do + R1=$(curl -s http://localhost:8004/health > /dev/null 2>&1 && echo 1 || echo 0) + R2=$(curl -s http://localhost:8003/health > /dev/null 2>&1 && echo 1 || echo 0) + [ "$R1" = "1" ] && [ "$R2" = "1" ] && break + sleep 3 +done + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 5 \ + --n-sessions 20 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 5 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/contextual_v2_$(date +%Y%m%d_%H%M%S) + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_experiments.sh b/collaborativeagents/slurm/run_experiments.sh new file mode 100644 index 0000000..e254202 --- /dev/null +++ b/collaborativeagents/slurm/run_experiments.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=run_expts +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:8 +#SBATCH --mem=400G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/run_expts_%j.out +#SBATCH --error=logs/run_expts_%j.err + +# Run experiments with models loaded locally +# This job needs 8 GPUs: +# - 4 GPUs for 70B judge model +# - 2 GPUs for PersonalizedLLM (embedder, reranker, extractor, main LLM) +# - Reserve for headroom + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +# Create logs and results directories +mkdir -p collaborativeagents/slurm/logs +mkdir -p collaborativeagents/results + +echo "Starting experiments at $(date)" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURMD_NODENAME" +echo "GPUs: $CUDA_VISIBLE_DEVICES" + +# Activate environment +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +# Check GPU availability +nvidia-smi + +# Add project to path +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +# Run experiments +cd collaborativeagents/scripts + +# Quick test first (2 profiles, 2 sessions) +echo "Running quick test..." +python run_experiments.py \ + --methods rag_vector \ + --datasets math-500 \ + --n-profiles 2 \ + --n-sessions 2 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/test_$(date +%Y%m%d_%H%M%S) + +# Full run (uncomment when ready) +# echo "Running full experiments..." +# python run_experiments.py \ +# --methods vanilla,all_memory,rag,rag_vector \ +# --datasets math-500,gpqa,aime \ +# --n-profiles 100 \ +# --n-sessions 20 \ +# --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ +# --output-dir ../results/full_$(date +%Y%m%d_%H%M%S) + +echo "Experiments completed at $(date)" diff --git a/collaborativeagents/slurm/run_experiments_a100.sh b/collaborativeagents/slurm/run_experiments_a100.sh new file mode 100644 index 0000000..aa2e658 --- /dev/null +++ b/collaborativeagents/slurm/run_experiments_a100.sh @@ -0,0 +1,58 @@ +#!/bin/bash +#SBATCH --job-name=run_expts_a100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/run_expts_a100_%j.out +#SBATCH --error=logs/run_expts_a100_%j.err + +# Run experiments on 4x A100 80GB +# - 70B judge model with TP=4 (~140GB) +# - 8B PersonalizedLLM models (~16GB shared) + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +# Create logs and results directories +mkdir -p collaborativeagents/slurm/logs +mkdir -p collaborativeagents/results + +echo "Starting experiments at $(date)" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURMD_NODENAME" +echo "GPUs: $CUDA_VISIBLE_DEVICES" + +# Activate environment +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +# Check GPU availability +nvidia-smi + +# Redirect HF cache to project space (avoid home quota issues) +export HF_HOME=/projects/bfqt/users/yurenh2/.cache/huggingface +mkdir -p $HF_HOME + +# Add project to path +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +# Run experiments +cd collaborativeagents/scripts + +# Full benchmark run +echo "Running full experiments..." +python run_experiments.py \ + --methods vanilla,all_memory,rag,rag_vector \ + --datasets math-500 \ + --n-profiles 20 \ + --n-sessions 5 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/full_$(date +%Y%m%d_%H%M%S) + +echo "Experiments completed at $(date)" diff --git a/collaborativeagents/slurm/run_experiments_collab_baselines.sh b/collaborativeagents/slurm/run_experiments_collab_baselines.sh new file mode 100755 index 0000000..bbf53fc --- /dev/null +++ b/collaborativeagents/slurm/run_experiments_collab_baselines.sh @@ -0,0 +1,59 @@ +#!/bin/bash +#SBATCH --job-name=run_collab_baselines +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/run_collab_baselines_%j.out +#SBATCH --error=logs/run_collab_baselines_%j.err + +# Run CollaborativeAgents baselines on 4x A100 80GB +# - contextual: Full history in context (summarize on overflow) +# - reflection: CollaborativeAgents' agent_notes approach +# - reflection_grpo: Reflection + GRPO training (with_proper_scaffolding) + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +# Create logs and results directories +mkdir -p collaborativeagents/slurm/logs +mkdir -p collaborativeagents/results + +echo "Starting CollaborativeAgents baselines at $(date)" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURMD_NODENAME" +echo "GPUs: $CUDA_VISIBLE_DEVICES" + +# Activate environment +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +# Check GPU availability +nvidia-smi + +# Redirect HF cache to project space (avoid home quota issues) +export HF_HOME=/projects/bfqt/users/yurenh2/.cache/huggingface +mkdir -p $HF_HOME + +# Add project to path +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +# Run experiments +cd collaborativeagents/scripts + +# Run the 3 CollaborativeAgents baselines +echo "Running contextual, reflection, reflection_grpo baselines..." +python run_experiments.py \ + --methods contextual,reflection,reflection_grpo \ + --datasets math-500 \ + --n-profiles 20 \ + --n-sessions 5 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/collab_baselines_$(date +%Y%m%d_%H%M%S) + +echo "CollaborativeAgents baselines completed at $(date)" diff --git a/collaborativeagents/slurm/run_experiments_multiturn.sh b/collaborativeagents/slurm/run_experiments_multiturn.sh new file mode 100755 index 0000000..ca5c04c --- /dev/null +++ b/collaborativeagents/slurm/run_experiments_multiturn.sh @@ -0,0 +1,60 @@ +#!/bin/bash +#SBATCH --job-name=run_multiturn +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/run_multiturn_%j.out +#SBATCH --error=logs/run_multiturn_%j.err + +# Run FIXED experiment with proper multi-turn conversation and user simulation +# This uses LocalUserAgent for user simulation and proper metrics extraction + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +# Create logs and results directories +mkdir -p collaborativeagents/slurm/logs +mkdir -p collaborativeagents/results + +echo "Starting MULTI-TURN experiment at $(date)" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURMD_NODENAME" +echo "GPUs: $CUDA_VISIBLE_DEVICES" + +# Activate environment +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +# Check GPU availability +nvidia-smi + +# Redirect HF cache to project space (avoid home quota issues) +export HF_HOME=/projects/bfqt/users/yurenh2/.cache/huggingface +mkdir -p $HF_HOME + +# Add project to path +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +# Run experiments +cd collaborativeagents/scripts + +# INITIAL TEST: Run with reduced parameters to validate the fix +# - 5 profiles (instead of 20) +# - 3 sessions per profile (instead of 5) +# - All 7 methods +echo "Running MULTI-TURN experiments with user simulation..." +python run_experiments.py \ + --methods vanilla,all_memory,rag,rag_vector,contextual,reflection,reflection_grpo \ + --datasets math-500 \ + --n-profiles 5 \ + --n-sessions 3 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/multiturn_test_$(date +%Y%m%d_%H%M%S) + +echo "Multi-turn experiment completed at $(date)" diff --git a/collaborativeagents/slurm/run_full_experiment.sh b/collaborativeagents/slurm/run_full_experiment.sh new file mode 100755 index 0000000..bebe58c --- /dev/null +++ b/collaborativeagents/slurm/run_full_experiment.sh @@ -0,0 +1,58 @@ +#!/bin/bash +#SBATCH --job-name=full_exp +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/full_exp_%j.out +#SBATCH --error=logs/full_exp_%j.err + +# Full scale experiment with 70B user model +# ORIGINAL CONFIG - DO NOT CHANGE + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +mkdir -p collaborativeagents/slurm/logs +mkdir -p collaborativeagents/results + +echo "Starting FULL SCALE experiment at $(date)" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURMD_NODENAME" +echo "GPUs: $CUDA_VISIBLE_DEVICES" + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +nvidia-smi + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +mkdir -p $HF_HOME + +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +# Fix for nvlink errors +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts + +# ORIGINAL FULL SCALE CONFIG: +# - 30 profiles +# - 20 sessions per profile +# - 4 challenging datasets: gpqa, aime, math-hard, humaneval +# - All 7 methods +echo "Running FULL SCALE experiments with 70B user model..." +python run_experiments.py \ + --methods vanilla,all_memory,rag,rag_vector,contextual,reflection,reflection_grpo \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/full_experiment_$(date +%Y%m%d_%H%M%S) + +echo "Full experiment completed at $(date)" diff --git a/collaborativeagents/slurm/run_full_experiment_v2.sh b/collaborativeagents/slurm/run_full_experiment_v2.sh new file mode 100755 index 0000000..ab77478 --- /dev/null +++ b/collaborativeagents/slurm/run_full_experiment_v2.sh @@ -0,0 +1,61 @@ +#!/bin/bash +#SBATCH --job-name=full_exp_v2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/full_exp_v2_%j.out +#SBATCH --error=logs/full_exp_v2_%j.err + +# Full scale experiment v2 - with fixes: +# 1. Use stable datasets (math-500, humaneval) - avoid problematic ones +# 2. Reduced scale first (20 profiles, 15 sessions) to verify stability +# 3. Clear CUDA cache between methods + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +mkdir -p collaborativeagents/slurm/logs +mkdir -p collaborativeagents/results + +echo "Starting FULL SCALE v2 experiment at $(date)" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURMD_NODENAME" +echo "GPUs: $CUDA_VISIBLE_DEVICES" + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +nvidia-smi + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +mkdir -p $HF_HOME + +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +# Disable peer-to-peer memory access to avoid nvlink errors +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts + +# FULL SCALE v2: +# - 20 profiles (reduced from 30 for stability) +# - 15 sessions per profile (enough to show learning) +# - 2 stable datasets: math-500, humaneval +# - All 7 methods +echo "Running FULL SCALE v2 experiments..." +python run_experiments.py \ + --methods vanilla,all_memory,rag,rag_vector,contextual,reflection,reflection_grpo \ + --datasets math-500,humaneval \ + --n-profiles 20 \ + --n-sessions 15 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/full_experiment_v2_$(date +%Y%m%d_%H%M%S) + +echo "Full experiment v2 completed at $(date)" diff --git a/collaborativeagents/slurm/run_grpo_test.sh b/collaborativeagents/slurm/run_grpo_test.sh new file mode 100644 index 0000000..199c540 --- /dev/null +++ b/collaborativeagents/slurm/run_grpo_test.sh @@ -0,0 +1,101 @@ +#!/bin/bash +#SBATCH --job-name=grpo_test +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:4 +#SBATCH --mem=200G +#SBATCH --time=1:00:00 +#SBATCH --output=grpo_test_%j.out +#SBATCH --error=grpo_test_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +# Install required packages (ensure they're in the conda env) +echo "Installing required packages..." +pip install --quiet json-repair tenacity + +# Test: Verify imports work +echo "Testing imports..." +python3 -c "from json_repair import repair_json; from tenacity import retry; print('Imports OK')" + +# Start judge model (70B) on GPUs 2,3 +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" + +echo "Starting judge model..." +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B --port 8004 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 --max-model-len 8192 \ + --download-dir $HF_HOME --dtype bfloat16 --disable-log-requests & + +# Wait for server +for i in $(seq 1 60); do + curl -s http://localhost:8004/health > /dev/null 2>&1 && break + sleep 3 +done +echo "Judge model ready" + +# Run GRPO with minimal steps for testing +echo "Starting GRPO test (10 steps only)..." +cd collaborativeagents/training/grpo_verl + +python -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=${PWD}/data/session_level_reflection_grpo_train.parquet \ + data.val_files=${PWD}/data/session_level_reflection_grpo_train.parquet \ + data.train_batch_size=8 \ + data.max_prompt_length=2048 \ + data.max_response_length=1024 \ + data.filter_overlong_prompts=True \ + data.truncation=error \ + data.prompt_key=prompt \ + data.reward_fn_key=data_source \ + actor_rollout_ref.model.path=/work/nvme/bfqt/yurenh2/sft_checkpoints/checkpoint-200 \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.ppo_mini_batch_size=4 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.003 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.model_dtype=bfloat16 \ + actor_rollout_ref.actor.fsdp_config.param_offload=False \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.n=4 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.ref.fsdp_config.model_dtype=bfloat16 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + actor_rollout_ref.rollout.temperature=0.9 \ + actor_rollout_ref.rollout.top_p=0.9 \ + custom_reward_function.path=${PWD}/verl_reward_functions.py \ + custom_reward_function.name=compute_score \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.val_before_train=False \ + trainer.logger='["console"]' \ + trainer.project_name=grpo-test \ + trainer.experiment_name=llama3.1-8b-grpo-test \ + trainer.n_gpus_per_node=2 \ + trainer.nnodes=1 \ + trainer.save_freq=100 \ + trainer.test_freq=100 \ + trainer.total_training_steps=10 \ + trainer.default_local_dir=/scratch/bfqt/yurenh2/grpo_test_outputs + +echo "GRPO test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_grpo_training.sh b/collaborativeagents/slurm/run_grpo_training.sh new file mode 100755 index 0000000..4f9e3f1 --- /dev/null +++ b/collaborativeagents/slurm/run_grpo_training.sh @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --job-name=grpo_refl +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/grpo_reflection_%j.out +#SBATCH --error=logs/grpo_reflection_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/training/outputs + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +# Use the AWQ 70B model for judge (fits on 2 GPUs) +JUDGE_MODEL="hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4" +JUDGE_PORT=8000 + +# Start vLLM server for judge model (on GPUs 2,3) +echo "=== Starting vLLM judge server ===" +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model "$JUDGE_MODEL" \ + --port $JUDGE_PORT \ + --tensor-parallel-size 2 \ + --max-model-len 8192 \ + --dtype auto \ + --trust-remote-code & + +VLLM_PID=$! +echo "vLLM server PID: $VLLM_PID" + +# Wait for server to be ready +echo "Waiting for vLLM server to start..." +for i in {1..60}; do + if curl -s http://localhost:$JUDGE_PORT/health > /dev/null 2>&1; then + echo "vLLM server is ready!" + break + fi + sleep 10 +done + +# Run GRPO training (on GPUs 0,1) +echo "=== Starting GRPO training ===" +CUDA_VISIBLE_DEVICES=0,1 python collaborativeagents/training/train_grpo.py \ + --model-path collaborativeagents/training/outputs/sft_reflection \ + --data-path collaborativeagents/training/training_data/grpo_training_data.json \ + --output-dir collaborativeagents/training/outputs/grpo_reflection \ + --judge-url "http://localhost:$JUDGE_PORT/v1" \ + --judge-model "$JUDGE_MODEL" \ + --max-steps 200 \ + --learning-rate 1e-6 \ + --num-generations 8 + +# Cleanup +echo "=== Cleanup ===" +kill $VLLM_PID 2>/dev/null || true + +echo "=== GRPO Training Complete ===" +echo "Model saved to: collaborativeagents/training/outputs/grpo_reflection/final" diff --git a/collaborativeagents/slurm/run_rag.sh b/collaborativeagents/slurm/run_rag.sh new file mode 100755 index 0000000..4c8bdf9 --- /dev/null +++ b/collaborativeagents/slurm/run_rag.sh @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=rag +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=2:00:00 +#SBATCH --output=rag_%j.out +#SBATCH --error=rag_%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a; source .env; set +a +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 20 \ + --n-sessions 5 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5 \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 20 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/gpt_user_scale_rag_$(date +%Y%m%d_%H%M%S) + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_rag_bge_v2.sh b/collaborativeagents/slurm/run_rag_bge_v2.sh new file mode 100755 index 0000000..1776bb9 --- /dev/null +++ b/collaborativeagents/slurm/run_rag_bge_v2.sh @@ -0,0 +1,61 @@ +#!/bin/bash +#SBATCH --job-name=rag_bge_v2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:4 +#SBATCH --mem=200G +#SBATCH --time=2:00:00 +#SBATCH --output=rag_bge_v2_%j.out +#SBATCH --error=rag_bge_v2_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +# 70B user simulator on GPUs 0,1 +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B --port 8004 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 --max-model-len 16384 \ + --download-dir $HF_HOME --dtype bfloat16 --disable-log-requests & + +# 8B agent on GPUs 2,3 with 40% memory (leaving room for BGE reranker + embedding) +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.40 --max-model-len 16384 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 120); do + R1=$(curl -s http://localhost:8004/health > /dev/null 2>&1 && echo 1 || echo 0) + R2=$(curl -s http://localhost:8003/health > /dev/null 2>&1 && echo 1 || echo 0) + [ "$R1" = "1" ] && [ "$R2" = "1" ] && break + sleep 3 +done + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods rag_vector_bge \ + --datasets math-hard \ + --n-profiles 5 \ + --n-sessions 20 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 5 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/rag_vector_bge_v2_$(date +%Y%m%d_%H%M%S) + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_rag_v2.sh b/collaborativeagents/slurm/run_rag_v2.sh new file mode 100755 index 0000000..29cd6c2 --- /dev/null +++ b/collaborativeagents/slurm/run_rag_v2.sh @@ -0,0 +1,59 @@ +#!/bin/bash +#SBATCH --job-name=rag_v2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:4 +#SBATCH --mem=200G +#SBATCH --time=2:00:00 +#SBATCH --output=rag_v2_%j.out +#SBATCH --error=rag_v2_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B --port 8004 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 --max-model-len 16384 \ + --download-dir $HF_HOME --dtype bfloat16 --disable-log-requests & + +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.40 --max-model-len 16384 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 120); do + R1=$(curl -s http://localhost:8004/health > /dev/null 2>&1 && echo 1 || echo 0) + R2=$(curl -s http://localhost:8003/health > /dev/null 2>&1 && echo 1 || echo 0) + [ "$R1" = "1" ] && [ "$R2" = "1" ] && break + sleep 3 +done + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 5 \ + --n-sessions 20 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 5 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/rag_v2_$(date +%Y%m%d_%H%M%S) + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_rag_vector.sh b/collaborativeagents/slurm/run_rag_vector.sh new file mode 100755 index 0000000..f12d6dc --- /dev/null +++ b/collaborativeagents/slurm/run_rag_vector.sh @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=rag_vector +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=2:00:00 +#SBATCH --output=rag_vector_%j.out +#SBATCH --error=rag_vector_%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a; source .env; set +a +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 20 \ + --n-sessions 5 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5 \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 20 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/gpt_user_scale_rag_vector_$(date +%Y%m%d_%H%M%S) + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_rag_vector_3x.sh b/collaborativeagents/slurm/run_rag_vector_3x.sh new file mode 100644 index 0000000..7ca2e3e --- /dev/null +++ b/collaborativeagents/slurm/run_rag_vector_3x.sh @@ -0,0 +1,74 @@ +#!/bin/bash +#SBATCH --job-name=rag_3x +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:4 +#SBATCH --mem=200G +#SBATCH --time=4:00:00 +#SBATCH --output=rag_3x_%j.out +#SBATCH --error=rag_3x_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +# 70B user simulator on GPUs 0,1 +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B --port 8004 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 --max-model-len 16384 \ + --download-dir $HF_HOME --dtype bfloat16 --disable-log-requests & + +# 8B agent on GPUs 2,3 with 40% memory (leaving room for embedding + reranker) +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.40 --max-model-len 16384 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 120); do + R1=$(curl -s http://localhost:8004/health > /dev/null 2>&1 && echo 1 || echo 0) + R2=$(curl -s http://localhost:8003/health > /dev/null 2>&1 && echo 1 || echo 0) + [ "$R1" = "1" ] && [ "$R2" = "1" ] && break + sleep 3 +done + +cd collaborativeagents/scripts + +# Run 3 times with different output directories +for run in 1 2 3; do + echo "=========================================" + echo "Starting Run $run of 3" + echo "=========================================" + + python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 5 \ + --n-sessions 20 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 5 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/rag_vector_run${run}_$(date +%Y%m%d_%H%M%S) + + echo "Run $run complete" + echo "" +done + +echo "All 3 runs complete!" + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_rag_vector_llm_test.sh b/collaborativeagents/slurm/run_rag_vector_llm_test.sh new file mode 100644 index 0000000..01d9c58 --- /dev/null +++ b/collaborativeagents/slurm/run_rag_vector_llm_test.sh @@ -0,0 +1,76 @@ +#!/bin/bash +#SBATCH --job-name=rvec_llm +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:4 +#SBATCH --mem=200G +#SBATCH --time=1:00:00 +#SBATCH --output=rvec_llm_%j.out +#SBATCH --error=rvec_llm_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +# Load OpenAI API key +set -a +source .env +set +a + +# Install openai if not present +pip install --quiet openai python-dotenv + +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +# 70B user simulator on GPUs 0,1 +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B --port 8004 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 --max-model-len 16384 \ + --download-dir $HF_HOME --dtype bfloat16 --disable-log-requests & + +# 8B agent on GPUs 2,3 with 40% memory (leaving room for embedding + reranker) +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.40 --max-model-len 16384 \ + --dtype bfloat16 --disable-log-requests & + +# Wait for servers +for i in $(seq 1 120); do + R1=$(curl -s http://localhost:8004/health > /dev/null 2>&1 && echo 1 || echo 0) + R2=$(curl -s http://localhost:8003/health > /dev/null 2>&1 && echo 1 || echo 0) + [ "$R1" = "1" ] && [ "$R2" = "1" ] && break + sleep 3 +done + +echo "vLLM servers ready." + +cd collaborativeagents/scripts + +# Small test: 2 profiles, 5 sessions each +python run_experiments.py \ + --methods rag_vector_llm \ + --datasets math-hard \ + --n-profiles 2 \ + --n-sessions 5 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 2 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/rag_vector_llm_test_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_rag_vector_v2.sh b/collaborativeagents/slurm/run_rag_vector_v2.sh new file mode 100755 index 0000000..d8151bb --- /dev/null +++ b/collaborativeagents/slurm/run_rag_vector_v2.sh @@ -0,0 +1,59 @@ +#!/bin/bash +#SBATCH --job-name=ragvec_v2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:4 +#SBATCH --mem=200G +#SBATCH --time=2:00:00 +#SBATCH --output=ragvec_v2_%j.out +#SBATCH --error=ragvec_v2_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B --port 8004 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 --max-model-len 16384 \ + --download-dir $HF_HOME --dtype bfloat16 --disable-log-requests & + +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.40 --max-model-len 16384 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 120); do + R1=$(curl -s http://localhost:8004/health > /dev/null 2>&1 && echo 1 || echo 0) + R2=$(curl -s http://localhost:8003/health > /dev/null 2>&1 && echo 1 || echo 0) + [ "$R1" = "1" ] && [ "$R2" = "1" ] && break + sleep 3 +done + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 5 \ + --n-sessions 20 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 5 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/rag_vector_v2_$(date +%Y%m%d_%H%M%S) + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_reflection.sh b/collaborativeagents/slurm/run_reflection.sh new file mode 100755 index 0000000..0f93941 --- /dev/null +++ b/collaborativeagents/slurm/run_reflection.sh @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=reflection +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/reflection_%j.out +#SBATCH --error=logs/reflection_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods reflection \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/reflection_$(date +%Y%m%d_%H%M%S) diff --git a/collaborativeagents/slurm/run_reflection_grpo.sh b/collaborativeagents/slurm/run_reflection_grpo.sh new file mode 100755 index 0000000..0d93e91 --- /dev/null +++ b/collaborativeagents/slurm/run_reflection_grpo.sh @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=reflection_grpo +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/reflection_grpo_%j.out +#SBATCH --error=logs/reflection_grpo_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods reflection_grpo \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/reflection_grpo_$(date +%Y%m%d_%H%M%S) diff --git a/collaborativeagents/slurm/run_reflection_v2.sh b/collaborativeagents/slurm/run_reflection_v2.sh new file mode 100755 index 0000000..5e1528c --- /dev/null +++ b/collaborativeagents/slurm/run_reflection_v2.sh @@ -0,0 +1,126 @@ +#!/bin/bash +#SBATCH --job-name=refl_v2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:4 +#SBATCH --mem=200G +#SBATCH --time=4:00:00 +#SBATCH --output=refl_v2_%j.out +#SBATCH --error=refl_v2_%j.err + +# Reflection experiment v2 - with proper_scaffolding enabled (LLM-based retrieval) +# Uses original CollaborativeAgents prompts for fair reproduction +# H200 node, 5 profiles, 15 sessions + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +# Model paths +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +PORT_USER=8004 +PORT_AGENT=8003 + +echo "=== Starting vLLM servers ===" +echo "Method: reflection (with proper_scaffolding)" +echo "User simulator: $MODEL_70B (70B full-precision)" +echo "Agent: $MODEL_8B (8B)" +date + +# Kill any existing vLLM servers +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +# Start 70B user simulator on GPU 0-1 (TP=2) +echo "Starting 70B user simulator on GPU 0-1..." +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B \ + --port $PORT_USER \ + --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 \ + --max-model-len 16384 \ + --download-dir $HF_HOME \ + --dtype bfloat16 \ + --disable-log-requests & +SERVER_USER_PID=$! + +# Start 8B agent on GPU 2-3 (TP=2) +echo "Starting 8B agent on GPU 2-3..." +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B \ + --port $PORT_AGENT \ + --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.90 \ + --max-model-len 16384 \ + --dtype bfloat16 \ + --disable-log-requests & +SERVER_AGENT_PID=$! + +echo "Waiting for vLLM servers to be ready..." + +# Wait for servers +for i in $(seq 1 120); do + READY_USER=$(curl -s http://localhost:$PORT_USER/health > /dev/null 2>&1 && echo 1 || echo 0) + READY_AGENT=$(curl -s http://localhost:$PORT_AGENT/health > /dev/null 2>&1 && echo 1 || echo 0) + + if [ "$READY_USER" = "1" ] && [ "$READY_AGENT" = "1" ]; then + echo "Both servers ready after $((i*3)) seconds" + break + fi + if [ $((i % 20)) -eq 0 ]; then + echo " Still waiting... user=$READY_USER, agent=$READY_AGENT ($((i*3))s)" + fi + sleep 3 +done + +# Verify health +if ! curl -s http://localhost:$PORT_USER/health > /dev/null; then + echo "ERROR: User server not healthy" + kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null + exit 1 +fi +if ! curl -s http://localhost:$PORT_AGENT/health > /dev/null; then + echo "ERROR: Agent server not healthy" + kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null + exit 1 +fi +echo "Both vLLM servers healthy!" + +echo "" +echo "=== Running reflection experiment with proper_scaffolding ===" +echo "Settings: 5 profiles, 15 sessions, math-hard dataset" +date + +cd collaborativeagents/scripts + +# Run reflection: 5 profiles, 15 sessions each +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 5 \ + --n-sessions 20 \ + --use-vllm \ + --vllm-user-url http://localhost:$PORT_USER/v1 \ + --vllm-agent-url http://localhost:$PORT_AGENT/v1 \ + --parallel-profiles 5 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/reflection_v2_$(date +%Y%m%d_%H%M%S) + +echo "" +echo "=== Experiment completed ===" +date + +# Cleanup +kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null || true diff --git a/collaborativeagents/slurm/run_sft_h200.sh b/collaborativeagents/slurm/run_sft_h200.sh new file mode 100644 index 0000000..3be79d2 --- /dev/null +++ b/collaborativeagents/slurm/run_sft_h200.sh @@ -0,0 +1,64 @@ +#!/bin/bash +#SBATCH --job-name=sft_train +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:4 +#SBATCH --mem=200G +#SBATCH --time=12:00:00 +#SBATCH --output=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/sft_train_%j.out +#SBATCH --error=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/sft_train_%j.err + +# SFT Training on H200 + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PWD}/collaborativeagents/scripts:${PYTHONPATH}" +export WANDB_PROJECT="collaborative-agent-reflection-sft" + +echo "=== SFT Training (H200) ===" +date +nvidia-smi --query-gpu=index,name,memory.total --format=csv + +DATA_PATH="collaborativeagents/training/training_data/sft_training_data.json" +MODEL_PATH="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +OUTPUT_DIR="collaborativeagents/training/outputs/sft_reflection" + +echo "Data: $DATA_PATH" +echo "Model: $MODEL_PATH" +echo "Output: $OUTPUT_DIR" + +# Check data exists +if [ ! -f "$DATA_PATH" ]; then + echo "ERROR: Training data not found at $DATA_PATH" + exit 1 +fi + +echo "" +echo "Training data size: $(wc -c < $DATA_PATH) bytes" +python -c "import json; d=json.load(open('$DATA_PATH')); print(f'Training examples: {len(d)}')" + +mkdir -p $OUTPUT_DIR + +echo "" +echo "Starting SFT training..." +python collaborativeagents/training/train_sft.py \ + --model-path $MODEL_PATH \ + --data-path $DATA_PATH \ + --output-dir $OUTPUT_DIR \ + --num-epochs 4 \ + --learning-rate 1e-6 \ + --batch-size 1 \ + --gradient-accumulation 64 + +echo "" +echo "=== SFT Training Complete ===" +echo "Model saved to: $OUTPUT_DIR" +date diff --git a/collaborativeagents/slurm/run_sft_only.sh b/collaborativeagents/slurm/run_sft_only.sh new file mode 100644 index 0000000..a98ae25 --- /dev/null +++ b/collaborativeagents/slurm/run_sft_only.sh @@ -0,0 +1,64 @@ +#!/bin/bash +#SBATCH --job-name=sft_train +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:4 +#SBATCH --mem=200G +#SBATCH --time=24:00:00 +#SBATCH --output=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/sft_train_%j.out +#SBATCH --error=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/sft_train_%j.err + +# SFT Training only (data already exists) + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PWD}/collaborativeagents/scripts:${PYTHONPATH}" +export WANDB_PROJECT="collaborative-agent-reflection-sft" + +echo "=== SFT Training ===" +date +nvidia-smi --query-gpu=index,name,memory.total --format=csv + +DATA_PATH="collaborativeagents/training/training_data/sft_training_data.json" +MODEL_PATH="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +OUTPUT_DIR="collaborativeagents/training/outputs/sft_reflection" + +echo "Data: $DATA_PATH" +echo "Model: $MODEL_PATH" +echo "Output: $OUTPUT_DIR" + +# Check data exists +if [ ! -f "$DATA_PATH" ]; then + echo "ERROR: Training data not found at $DATA_PATH" + exit 1 +fi + +echo "" +echo "Training data size: $(wc -c < $DATA_PATH) bytes" +python -c "import json; d=json.load(open('$DATA_PATH')); print(f'Training examples: {len(d)}')" + +mkdir -p $OUTPUT_DIR + +echo "" +echo "Starting SFT training..." +python collaborativeagents/training/train_sft.py \ + --model-path $MODEL_PATH \ + --data-path $DATA_PATH \ + --output-dir $OUTPUT_DIR \ + --num-epochs 4 \ + --learning-rate 1e-6 \ + --batch-size 1 \ + --gradient-accumulation 64 + +echo "" +echo "=== SFT Training Complete ===" +echo "Model saved to: $OUTPUT_DIR" +date diff --git a/collaborativeagents/slurm/run_sft_training.sh b/collaborativeagents/slurm/run_sft_training.sh new file mode 100755 index 0000000..8cc9f78 --- /dev/null +++ b/collaborativeagents/slurm/run_sft_training.sh @@ -0,0 +1,43 @@ +#!/bin/bash +#SBATCH --job-name=sft_refl +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:4 +#SBATCH --mem=200G +#SBATCH --time=24:00:00 +#SBATCH --output=logs/sft_reflection_%j.out +#SBATCH --error=logs/sft_reflection_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/training/outputs collaborativeagents/training/training_data + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export WANDB_PROJECT="collaborative-agent-reflection-sft" + +# Step 1: Generate training data from completed experiments +echo "=== Step 1: Generating training data ===" +python collaborativeagents/training/generate_training_data.py \ + --results-dir collaborativeagents/results \ + --output-dir collaborativeagents/training/training_data + +# Step 2: Run SFT training using TRL +echo "=== Step 2: Running SFT training ===" +python collaborativeagents/training/train_sft.py \ + --model-path /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct \ + --data-path collaborativeagents/training/training_data/sft_training_data.json \ + --output-dir collaborativeagents/training/outputs/sft_reflection \ + --num-epochs 4 \ + --learning-rate 1e-6 \ + --batch-size 1 \ + --gradient-accumulation 64 + +echo "=== SFT Training Complete ===" +echo "Model saved to: collaborativeagents/training/outputs/sft_reflection" diff --git a/collaborativeagents/slurm/run_vanilla.sh b/collaborativeagents/slurm/run_vanilla.sh new file mode 100755 index 0000000..b29d3a2 --- /dev/null +++ b/collaborativeagents/slurm/run_vanilla.sh @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=vanilla +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=48:00:00 +#SBATCH --output=logs/vanilla_%j.out +#SBATCH --error=logs/vanilla_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs collaborativeagents/results + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods vanilla \ + --datasets mmlu,aime,math-hard,humaneval \ + --n-profiles 30 \ + --n-sessions 20 \ + --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ + --output-dir ../results/vanilla_$(date +%Y%m%d_%H%M%S) diff --git a/collaborativeagents/slurm/run_vanilla_v2.sh b/collaborativeagents/slurm/run_vanilla_v2.sh new file mode 100755 index 0000000..5db6064 --- /dev/null +++ b/collaborativeagents/slurm/run_vanilla_v2.sh @@ -0,0 +1,59 @@ +#!/bin/bash +#SBATCH --job-name=vanilla_v2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:4 +#SBATCH --mem=200G +#SBATCH --time=2:00:00 +#SBATCH --output=vanilla_v2_%j.out +#SBATCH --error=vanilla_v2_%j.err + +set -e +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +MODEL_70B="meta-llama/Llama-3.1-70B-Instruct" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B --port 8004 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.95 --max-model-len 16384 \ + --download-dir $HF_HOME --dtype bfloat16 --disable-log-requests & + +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.90 --max-model-len 16384 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 120); do + R1=$(curl -s http://localhost:8004/health > /dev/null 2>&1 && echo 1 || echo 0) + R2=$(curl -s http://localhost:8003/health > /dev/null 2>&1 && echo 1 || echo 0) + [ "$R1" = "1" ] && [ "$R2" = "1" ] && break + sleep 3 +done + +cd collaborativeagents/scripts +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 5 \ + --n-sessions 20 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 5 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/vanilla_v2_$(date +%Y%m%d_%H%M%S) + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/scale_contextual_forget.sbatch b/collaborativeagents/slurm/scale_contextual_forget.sbatch new file mode 100644 index 0000000..4942a4d --- /dev/null +++ b/collaborativeagents/slurm/scale_contextual_forget.sbatch @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --job-name=ctx_forget +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8-interactive +#SBATCH --gres=gpu:4 +#SBATCH --time=01:00:00 +#SBATCH --mem=200G +#SBATCH --cpus-per-task=32 +#SBATCH --output=%x-%j.out +#SBATCH --error=%x-%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +source ~/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export TRANSFORMERS_CACHE=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model:$PYTHONPATH + +PROFILE_PATH="collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl" +AGENT_MODEL="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +USER_MODEL="meta-llama/Llama-3.1-70B-Instruct" + +echo "=== Starting vLLM servers ===" +date + +# User simulator on GPUs 0,1 (70B) +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $USER_MODEL \ + --port 8004 --tensor-parallel-size 2 --gpu-memory-utilization 0.90 \ + --max-model-len 16384 --dtype bfloat16 --download-dir $HF_HOME & + +# Agent on GPUs 2,3 (8B) +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $AGENT_MODEL \ + --port 8003 --tensor-parallel-size 2 --gpu-memory-utilization 0.90 \ + --max-model-len 16384 --dtype bfloat16 & + +# Wait for servers +echo "Waiting for vLLM servers..." +for i in {1..200}; do + if curl -s http://localhost:8004/health > /dev/null 2>&1; then + echo "User simulator (8004) ready after $((i*5)) seconds" + break + fi + sleep 5 +done +for i in {1..60}; do + if curl -s http://localhost:8003/health > /dev/null 2>&1; then + echo "Agent (8003) ready after $((i*5)) seconds" + break + fi + sleep 5 +done +echo "Both vLLM servers ready" +sleep 10 + +# Run contextual with reduced memory limits (4000 tokens, 15 turns) +CUDA_VISIBLE_DEVICES=2,3 python collaborativeagents/scripts/run_experiments.py \ + --methods contextual \ + --n-profiles 5 \ + --n-sessions 15 \ + --output-dir results/scale_test_contextual_forget \ + --profile-path $PROFILE_PATH \ + --datasets math-hard \ + --use-vllm --parallel-profiles 30 --no-batch-processing + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/scale_missing.sbatch b/collaborativeagents/slurm/scale_missing.sbatch new file mode 100644 index 0000000..1ab4310 --- /dev/null +++ b/collaborativeagents/slurm/scale_missing.sbatch @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH --job-name=scale_miss +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8-interactive +#SBATCH --gres=gpu:4 +#SBATCH --time=01:00:00 +#SBATCH --mem=128G +#SBATCH --cpus-per-task=16 +#SBATCH --output=%x-%j.out +#SBATCH --error=%x-%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +source ~/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/.cache/huggingface +export TRANSFORMERS_CACHE=/projects/bfqt/users/yurenh2/.cache/huggingface +export PYTHONPATH=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model:$PYTHONPATH + +# Run rag and reflection for profiles 5 only (start_profile=4, end_profile=5 means profile index 4) +python collaborativeagents/scripts/run_experiments.py \ + --methods rag,reflection \ + --num_profiles 5 \ + --start_profile 4 \ + --sessions_per_profile 15 \ + --output_dir results/scale_test_missing \ + --profile_path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --datasets math-hard diff --git a/collaborativeagents/slurm/scale_rag_remaining.sbatch b/collaborativeagents/slurm/scale_rag_remaining.sbatch new file mode 100644 index 0000000..b11bba3 --- /dev/null +++ b/collaborativeagents/slurm/scale_rag_remaining.sbatch @@ -0,0 +1,71 @@ +#!/bin/bash +#SBATCH --job-name=scale_rag2 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8-interactive +#SBATCH --gres=gpu:4 +#SBATCH --time=01:00:00 +#SBATCH --mem=200G +#SBATCH --cpus-per-task=32 +#SBATCH --output=%x-%j.out +#SBATCH --error=%x-%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +source ~/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export TRANSFORMERS_CACHE=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model:$PYTHONPATH + +PROFILE_PATH="collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl" +AGENT_MODEL="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +USER_MODEL="meta-llama/Llama-3.1-70B-Instruct" + +echo "=== Starting vLLM servers ===" +date + +# User simulator on GPUs 0,1 (70B) +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $USER_MODEL \ + --port 8004 --tensor-parallel-size 2 --gpu-memory-utilization 0.90 \ + --max-model-len 16384 --dtype bfloat16 --download-dir $HF_HOME & + +# Agent on GPUs 2,3 (8B) - lower memory for rag_vector (needs embedding/reranker) +CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \ + --model $AGENT_MODEL \ + --port 8003 --tensor-parallel-size 2 --gpu-memory-utilization 0.40 \ + --max-model-len 16384 --dtype bfloat16 & + +# Wait for servers +echo "Waiting for vLLM servers..." +for i in {1..200}; do + if curl -s http://localhost:8004/health > /dev/null 2>&1; then + echo "User simulator (8004) ready after $((i*5)) seconds" + break + fi + sleep 5 +done +for i in {1..60}; do + if curl -s http://localhost:8003/health > /dev/null 2>&1; then + echo "Agent (8003) ready after $((i*5)) seconds" + break + fi + sleep 5 +done +echo "Both vLLM servers ready" +sleep 10 + +# Run rag_vector - only missing profiles 3,4 (0,1,2 already complete) +# Don't restrict CUDA devices - let PersonalizedLLM handle GPU assignment +python collaborativeagents/scripts/run_experiments.py \ + --methods rag_vector \ + --n-profiles 5 \ + --n-sessions 15 \ + --start-profile 3 \ + --output-dir results/scale_test_remaining \ + --profile-path $PROFILE_PATH \ + --datasets math-hard \ + --use-vllm --parallel-profiles 30 --no-batch-processing + +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/start_model_servers.sh b/collaborativeagents/slurm/start_model_servers.sh new file mode 100644 index 0000000..1f4e177 --- /dev/null +++ b/collaborativeagents/slurm/start_model_servers.sh @@ -0,0 +1,82 @@ +#!/bin/bash +#SBATCH --job-name=model_servers +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuH200x8 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:h200:8 +#SBATCH --mem=400G +#SBATCH --time=24:00:00 +#SBATCH --output=logs/model_servers_%j.out +#SBATCH --error=logs/model_servers_%j.err + +# Start vLLM/sglang model servers for experiments +# - Port 8004: Llama-3.3-70B-Instruct (user simulator + judge) - 4 GPUs +# - Port 8003: Llama-3.1-8B-Instruct (agent) - 2 GPUs + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +# Create logs directory +mkdir -p collaborativeagents/slurm/logs + +echo "Starting model servers at $(date)" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURMD_NODENAME" +echo "GPUs: $CUDA_VISIBLE_DEVICES" + +# Activate environment with sglang +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +# Check GPU availability +nvidia-smi + +# Start 70B model server (user simulator + judge) - needs 4 GPUs for TP +echo "Starting Llama-3.3-70B-Instruct server on port 8004..." +CUDA_VISIBLE_DEVICES=0,1,2,3 python -m sglang.launch_server \ + --model-path meta-llama/Llama-3.3-70B-Instruct \ + --port 8004 \ + --tp-size 4 \ + --context-length 16384 \ + --mem-fraction-static 0.85 \ + 2>&1 | tee logs/server_70b_$SLURM_JOB_ID.log & +SERVER_70B_PID=$! + +# Wait for 70B server to start (takes a few minutes) +echo "Waiting for 70B server to initialize..." +sleep 120 + +# Start 8B model server (agent) - needs 2 GPUs +echo "Starting Llama-3.1-8B-Instruct server on port 8003..." +CUDA_VISIBLE_DEVICES=4,5 python -m sglang.launch_server \ + --model-path models/llama-3.1-8b-instruct \ + --served-model-name meta-llama/Llama-3.1-8B-Instruct \ + --port 8003 \ + --tp-size 2 \ + --context-length 16384 \ + --mem-fraction-static 0.85 \ + 2>&1 | tee logs/server_8b_$SLURM_JOB_ID.log & +SERVER_8B_PID=$! + +echo "Servers starting..." +echo "70B server PID: $SERVER_70B_PID" +echo "8B server PID: $SERVER_8B_PID" + +# Save server info for experiment runner +cat > collaborativeagents/slurm/server_info.txt << EOF +NODE=$SLURMD_NODENAME +JOB_ID=$SLURM_JOB_ID +SERVER_70B_PID=$SERVER_70B_PID +SERVER_8B_PID=$SERVER_8B_PID +USER_API_BASE=http://$SLURMD_NODENAME:8004/v1 +AGENT_API_BASE=http://$SLURMD_NODENAME:8003/v1 +JUDGE_API_BASE=http://$SLURMD_NODENAME:8004/v1 +EOF + +echo "Server info saved to collaborativeagents/slurm/server_info.txt" + +# Wait for both servers +wait $SERVER_70B_PID $SERVER_8B_PID diff --git a/collaborativeagents/slurm/test_70b_pilot.sh b/collaborativeagents/slurm/test_70b_pilot.sh new file mode 100755 index 0000000..3514e86 --- /dev/null +++ b/collaborativeagents/slurm/test_70b_pilot.sh @@ -0,0 +1,48 @@ +#!/bin/bash +#SBATCH --job-name=test_70b +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=01:00:00 +#SBATCH --output=logs/test_70b_%j.out +#SBATCH --error=logs/test_70b_%j.err + +# Pilot test for 70B AWQ user model +# Tests that the model loads without OOM and multi-turn works + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +# Create logs directory +mkdir -p collaborativeagents/slurm/logs + +echo "Starting 70B pilot test at $(date)" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURMD_NODENAME" +echo "GPUs: $CUDA_VISIBLE_DEVICES" + +# Activate environment +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +# Check GPU availability +nvidia-smi + +# Set HF cache to project space +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +mkdir -p $HF_HOME + +# Add project to path +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +# Run pilot test +cd collaborativeagents/scripts +echo "Running 70B pilot test..." +python test_70b_pilot.py + +echo "Pilot test completed at $(date)" diff --git a/collaborativeagents/slurm/test_extractor.sh b/collaborativeagents/slurm/test_extractor.sh new file mode 100755 index 0000000..252af2c --- /dev/null +++ b/collaborativeagents/slurm/test_extractor.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH --job-name=test_ext +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --gres=gpu:1 +#SBATCH --mem=32G +#SBATCH --time=00:10:00 +#SBATCH --output=logs/test_extractor_%j.out +#SBATCH --error=logs/test_extractor_%j.err + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +mkdir -p collaborativeagents/slurm/logs + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface + +python collaborativeagents/scripts/test_extractor.py diff --git a/collaborativeagents/slurm/test_multiturn.sh b/collaborativeagents/slurm/test_multiturn.sh new file mode 100755 index 0000000..1bf528d --- /dev/null +++ b/collaborativeagents/slurm/test_multiturn.sh @@ -0,0 +1,38 @@ +#!/bin/bash +#SBATCH --job-name=test_multiturn +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=100G +#SBATCH --time=00:30:00 +#SBATCH --output=logs/test_multiturn_%j.out +#SBATCH --error=logs/test_multiturn_%j.err + +# Quick validation test for multi-turn conversation + +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model + +mkdir -p collaborativeagents/slurm/logs + +echo "Starting test at $(date)" +echo "Job ID: $SLURM_JOB_ID" + +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +nvidia-smi + +export HF_HOME=/projects/bfqt/users/yurenh2/.cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" + +cd collaborativeagents/scripts + +echo "Running multi-turn validation test..." +python test_multiturn.py + +echo "Test completed at $(date)" diff --git a/collaborativeagents/slurm/test_vllm_70b_8b.sh b/collaborativeagents/slurm/test_vllm_70b_8b.sh new file mode 100644 index 0000000..815f267 --- /dev/null +++ b/collaborativeagents/slurm/test_vllm_70b_8b.sh @@ -0,0 +1,167 @@ +#!/bin/bash +#SBATCH --job-name=vllm_bench +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=2 +#SBATCH --time=02:00:00 +#SBATCH --mem=128G +#SBATCH --output=slurm/logs/vllm_bench_70b_8b_%j.out +#SBATCH --error=slurm/logs/vllm_bench_70b_8b_%j.err + +# Realistic benchmark: 70B AWQ user simulator + 8B agent +# Tests actual conversation throughput with both models +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface + +echo "=== Job Info ===" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURM_NODELIST" +date + +echo "" +echo "=== GPU Info ===" +nvidia-smi --query-gpu=index,name,memory.total,memory.free --format=csv + +# Download AWQ 70B model if not complete +echo "" +echo "=== Ensuring AWQ 70B Model is Downloaded ===" +python -c " +from huggingface_hub import snapshot_download +import os +os.environ['HF_HOME'] = '/projects/bfqt/users/yurenh2/hf_cache/huggingface' +print('Checking/downloading hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4...') +path = snapshot_download('hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4') +print(f'Model ready at: {path}') +" + +MODEL_70B_AWQ="hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +PORT_70B=8004 +PORT_8B=8003 + +echo "" +echo "============================================" +echo "Starting 70B AWQ vLLM Server (GPU 0)" +echo "============================================" +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B_AWQ \ + --port $PORT_70B \ + --gpu-memory-utilization 0.90 \ + --max-model-len 4096 \ + --disable-log-requests \ + --quantization awq \ + --dtype float16 & +SERVER_70B_PID=$! +echo "70B Server PID: $SERVER_70B_PID" + +echo "" +echo "============================================" +echo "Starting 8B vLLM Server (GPU 1)" +echo "============================================" +CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B \ + --port $PORT_8B \ + --gpu-memory-utilization 0.90 \ + --max-model-len 4096 \ + --disable-log-requests \ + --dtype bfloat16 & +SERVER_8B_PID=$! +echo "8B Server PID: $SERVER_8B_PID" + +echo "" +echo "Waiting for servers to start..." + +# Wait for 70B (may take 3-5 minutes) +for i in $(seq 1 120); do + if curl -s http://localhost:$PORT_70B/health > /dev/null 2>&1; then + echo "70B Server ready after $((i*3)) seconds" + break + fi + if [ $((i % 20)) -eq 0 ]; then + echo " Waiting for 70B... ($((i*3)) seconds)" + fi + sleep 3 +done + +# Wait for 8B +for i in $(seq 1 60); do + if curl -s http://localhost:$PORT_8B/health > /dev/null 2>&1; then + echo "8B Server ready after $((i*2)) seconds" + break + fi + sleep 2 +done + +# Check both servers +echo "" +if ! curl -s http://localhost:$PORT_70B/health > /dev/null 2>&1; then + echo "ERROR: 70B server failed to start" + kill $SERVER_70B_PID $SERVER_8B_PID 2>/dev/null + exit 1 +fi +echo "✓ 70B server healthy" + +if ! curl -s http://localhost:$PORT_8B/health > /dev/null 2>&1; then + echo "ERROR: 8B server failed to start" + kill $SERVER_70B_PID $SERVER_8B_PID 2>/dev/null + exit 1 +fi +echo "✓ 8B server healthy" + +echo "" +echo "=== vLLM Server Info ===" +echo "70B model:" +curl -s http://localhost:$PORT_70B/v1/models | python -m json.tool 2>/dev/null | head -10 +echo "" +echo "8B model:" +curl -s http://localhost:$PORT_8B/v1/models | python -m json.tool 2>/dev/null | head -10 + +echo "" +echo "============================================" +echo "Test 1: Individual Model Throughput" +echo "============================================" + +echo "" +echo "--- 70B AWQ Sequential (10 requests) ---" +python scripts/benchmark_inference.py --mode vllm --url http://localhost:$PORT_70B/v1 -n 10 + +echo "" +echo "--- 8B Sequential (20 requests) ---" +python scripts/benchmark_inference.py --mode vllm --url http://localhost:$PORT_8B/v1 -n 20 + +echo "" +echo "============================================" +echo "Test 2: Full Conversation Benchmark" +echo "============================================" +echo "Running 10 conversations with 70B user simulator + 8B agent..." +python scripts/benchmark_inference.py \ + --mode conversation \ + --url-70b http://localhost:$PORT_70B/v1 \ + --url-8b http://localhost:$PORT_8B/v1 \ + -n 10 + +# Cleanup +echo "" +echo "Cleaning up..." +kill $SERVER_70B_PID $SERVER_8B_PID 2>/dev/null +wait $SERVER_70B_PID $SERVER_8B_PID 2>/dev/null + +echo "" +echo "============================================" +echo "BENCHMARK COMPLETE!" +echo "============================================" +echo "" +echo "Key metrics to compare with paper:" +echo " - Paper: 2000 conversations/hour on H100x8" +echo " - Expected A100x2 with 70B AWQ + 8B: ~100-300 conv/hr" +echo " - Our old code: ~20 conv/hr" +echo "" +echo "If throughput is good, update experiment code to use vLLM." +echo "" +date diff --git a/collaborativeagents/slurm/test_vllm_benchmark.sh b/collaborativeagents/slurm/test_vllm_benchmark.sh new file mode 100644 index 0000000..d812b43 --- /dev/null +++ b/collaborativeagents/slurm/test_vllm_benchmark.sh @@ -0,0 +1,102 @@ +#!/bin/bash +#SBATCH --job-name=vllm_bench +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=1 +#SBATCH --time=00:30:00 +#SBATCH --output=slurm/logs/vllm_bench_%j.out +#SBATCH --error=slurm/logs/vllm_bench_%j.err + +# Benchmark vLLM vs transformers inference speed +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export TRANSFORMERS_CACHE=/projects/bfqt/users/yurenh2/hf_cache/huggingface + +echo "=== Job Info ===" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURM_NODELIST" +echo "GPUs: $SLURM_GPUS_ON_NODE" +date + +echo "" +echo "=== GPU Info ===" +nvidia-smi --query-gpu=index,name,memory.total,memory.free --format=csv + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +PORT=8003 + +# ============================================ +# Test 1: Transformers baseline +# ============================================ +echo "" +echo "============================================" +echo "Test 1: Transformers Baseline (10 requests)" +echo "============================================" +python scripts/benchmark_inference.py --mode transformers --model $MODEL_8B -n 10 + +# ============================================ +# Test 2: vLLM server +# ============================================ +echo "" +echo "============================================" +echo "Test 2: Starting vLLM Server" +echo "============================================" + +# Start vLLM server +python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B \ + --port $PORT \ + --gpu-memory-utilization 0.9 \ + --max-model-len 4096 \ + --disable-log-requests & +SERVER_PID=$! +echo "vLLM Server PID: $SERVER_PID" + +# Wait for server to be ready +echo "Waiting for server to start..." +for i in {1..60}; do + if curl -s http://localhost:$PORT/health > /dev/null 2>&1; then + echo "Server ready after $((i*5)) seconds" + break + fi + sleep 5 +done + +# Check if server is up +if ! curl -s http://localhost:$PORT/health > /dev/null 2>&1; then + echo "ERROR: vLLM server failed to start" + kill $SERVER_PID 2>/dev/null || true + exit 1 +fi + +echo "" +echo "============================================" +echo "Test 2a: vLLM Sequential (20 requests)" +echo "============================================" +python scripts/benchmark_inference.py --mode vllm --url http://localhost:$PORT/v1 -n 20 + +echo "" +echo "============================================" +echo "Test 2b: vLLM Concurrent (50 requests)" +echo "============================================" +python scripts/benchmark_inference.py --mode vllm --url http://localhost:$PORT/v1 -n 50 --concurrent + +# Cleanup +echo "" +echo "Cleaning up..." +kill $SERVER_PID 2>/dev/null || true +wait $SERVER_PID 2>/dev/null || true + +echo "" +echo "============================================" +echo "Benchmark Complete!" +echo "============================================" +echo "" +echo "Target: 2000 conversations/hour (paper on H100x8)" +echo "Our A100x4 should achieve ~500-1000 conv/hr with vLLM" +echo "" +date diff --git a/collaborativeagents/slurm/test_vllm_only.sh b/collaborativeagents/slurm/test_vllm_only.sh new file mode 100644 index 0000000..302952c --- /dev/null +++ b/collaborativeagents/slurm/test_vllm_only.sh @@ -0,0 +1,117 @@ +#!/bin/bash +#SBATCH --job-name=vllm_only +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=1 +#SBATCH --time=00:45:00 +#SBATCH --mem=64G +#SBATCH --output=slurm/logs/vllm_only_%j.out +#SBATCH --error=slurm/logs/vllm_only_%j.err + +# Test vLLM inference speed ONLY (skip transformers which OOMs) +set -e + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents + +# Activate conda environment +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +# Install vLLM if not already installed (requires GPU node for CUDA compilation) +if ! python -c "import vllm" 2>/dev/null; then + echo "Installing vLLM..." + pip install vllm --quiet +fi + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface + +echo "=== Job Info ===" +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURM_NODELIST" +date + +echo "" +echo "=== GPU Info ===" +nvidia-smi --query-gpu=index,name,memory.total,memory.free --format=csv + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +PORT=8003 + +echo "" +echo "============================================" +echo "Starting vLLM Server for 8B Model" +echo "============================================" + +# Start vLLM server with memory optimization +python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B \ + --port $PORT \ + --gpu-memory-utilization 0.85 \ + --max-model-len 4096 \ + --disable-log-requests \ + --dtype bfloat16 & +SERVER_PID=$! +echo "vLLM Server PID: $SERVER_PID" + +# Wait for server to be ready +echo "Waiting for server to start..." +for i in {1..90}; do + if curl -s http://localhost:$PORT/health > /dev/null 2>&1; then + echo "Server ready after $((i*2)) seconds" + break + fi + sleep 2 +done + +# Check if server is up +if ! curl -s http://localhost:$PORT/health > /dev/null 2>&1; then + echo "ERROR: vLLM server failed to start" + cat slurm/logs/vllm_only_${SLURM_JOB_ID}.err | tail -50 + kill $SERVER_PID 2>/dev/null || true + exit 1 +fi + +# Get model info +echo "" +echo "=== vLLM Server Info ===" +curl -s http://localhost:$PORT/v1/models | python -m json.tool 2>/dev/null || echo "Could not get model info" + +echo "" +echo "============================================" +echo "Test 1: vLLM Sequential (20 requests)" +echo "============================================" +python scripts/benchmark_inference.py --mode vllm --url http://localhost:$PORT/v1 -n 20 + +echo "" +echo "============================================" +echo "Test 2: vLLM Sequential (50 requests)" +echo "============================================" +python scripts/benchmark_inference.py --mode vllm --url http://localhost:$PORT/v1 -n 50 + +echo "" +echo "============================================" +echo "Test 3: vLLM Concurrent 4 workers (50 req)" +echo "============================================" +python scripts/benchmark_inference.py --mode vllm --url http://localhost:$PORT/v1 -n 50 --concurrent + +# Cleanup +echo "" +echo "Cleaning up..." +kill $SERVER_PID 2>/dev/null || true +wait $SERVER_PID 2>/dev/null || true + +echo "" +echo "============================================" +echo "BENCHMARK COMPLETE!" +echo "============================================" +echo "" +echo "Key metrics to compare with paper:" +echo " - Paper: 2000 conversations/hour on H100x8" +echo " - Expected A100x1: ~200-500 conv/hr" +echo " - Our old code: ~20 conv/hr (100x slower)" +echo "" +echo "If vLLM shows good throughput, we need to update" +echo "our experiment code to use vLLM instead of transformers." +echo "" +date -- cgit v1.2.3