diff options
Diffstat (limited to 'collaborativeagents/slurm/fullscale')
36 files changed, 3413 insertions, 0 deletions
diff --git a/collaborativeagents/slurm/fullscale/continue_to_30sess.sh b/collaborativeagents/slurm/fullscale/continue_to_30sess.sh new file mode 100644 index 0000000..93ddecc --- /dev/null +++ b/collaborativeagents/slurm/fullscale/continue_to_30sess.sh @@ -0,0 +1,76 @@ +#!/bin/bash +#SBATCH --job-name=continue_30sess +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=16:00:00 +#SBATCH --output=continue_30sess_%j.out +#SBATCH --error=continue_30sess_%j.err + +# Continue experiment from 15 to 30 sessions +# This will pick up from existing checkpoints and only run remaining sessions + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Find the existing results directory +RESULTS_DIR=$(ls -td ../results/fullscale_15sess/*/ 2>/dev/null | head -1) + +if [ -z "$RESULTS_DIR" ]; then + echo "ERROR: No existing results directory found in fullscale_15sess/" + exit 1 +fi + +echo "Continuing from: $RESULTS_DIR" + +# Continue with 30 sessions (the checkpoint system will skip already-completed sessions) +python run_experiments.py \ + --methods vanilla,contextual,reflection,all_memory,rag,rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --n-sessions 30 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --continue-from "$RESULTS_DIR" + +echo "Continue to 30 sessions complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/generate_jobs.sh b/collaborativeagents/slurm/fullscale/generate_jobs.sh new file mode 100644 index 0000000..0bc5c0b --- /dev/null +++ b/collaborativeagents/slurm/fullscale/generate_jobs.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Generate all job scripts (6 methods × 4 profile ranges = 24 jobs) +# Each job: 50 profiles × 15 sessions = 750 sessions ≈ 7-8 hours + +METHODS="vanilla contextual reflection all_memory rag rag_vector" +RANGES="0:50 50:100 100:150 150:200" + +for method in $METHODS; do + for range in $RANGES; do + start=${range%:*} + end=${range#*:} + + cat > run_${method}_p${start}.sh << EOF +#!/bin/bash +#SBATCH --job-name=exp_${method}_p${start} +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_${method}_p${start}_%j.out +#SBATCH --error=exp_${method}_p${start}_%j.err + +# Full run: ${method} method, profiles ${start}-${end} (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="\${PWD}/src:\${PWD}/collaborativeagents:\${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \\ + --model \$MODEL_8B --port 8003 --tensor-parallel-size 1 \\ + --gpu-memory-utilization 0.5 --max-model-len 8192 \\ + --dtype bfloat16 --disable-log-requests & + +for i in \$(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \\ + --methods ${method} \\ + --datasets math-hard \\ + --n-profiles 200 \\ + --start-profile ${start} \\ + --end-profile ${end} \\ + --n-sessions 15 \\ + --max-turns 8 \\ + --use-vllm \\ + --use-openai-user \\ + --openai-user-model gpt-5-mini \\ + --reward-mode llm \\ + --vllm-agent-url http://localhost:8003/v1 \\ + --parallel-profiles 25 \\ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \\ + --output-dir ../results/fullscale_15sess + +echo "${method} p${start}-${end} complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true +EOF + chmod +x run_${method}_p${start}.sh + echo "Created run_${method}_p${start}.sh" + done +done + +echo "" +echo "Generated 24 job scripts (6 methods × 4 profile ranges)" +echo "Each job: 50 profiles × 15 sessions = 750 sessions" +echo "Estimated time per job: ~7-8 hours" diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh new file mode 100755 index 0000000..bb7968b --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p0_%j.out +#SBATCH --error=exp_all_memory_p0_%j.err + +# Full run: all_memory method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh new file mode 100755 index 0000000..21db6de --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p100_%j.out +#SBATCH --error=exp_all_memory_p100_%j.err + +# Full run: all_memory method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh new file mode 100755 index 0000000..da7a729 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p150_%j.out +#SBATCH --error=exp_all_memory_p150_%j.err + +# Full run: all_memory method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh b/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh new file mode 100755 index 0000000..60bc9ee --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_all_memory_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_all_memory_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_all_memory_p50_%j.out +#SBATCH --error=exp_all_memory_p50_%j.err + +# Full run: all_memory method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods all_memory \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "all_memory p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p0.sh b/collaborativeagents/slurm/fullscale/run_contextual_p0.sh new file mode 100755 index 0000000..6fa0211 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p0_%j.out +#SBATCH --error=exp_contextual_p0_%j.err + +# Full run: contextual method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p100.sh b/collaborativeagents/slurm/fullscale/run_contextual_p100.sh new file mode 100755 index 0000000..8250c19 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p100_%j.out +#SBATCH --error=exp_contextual_p100_%j.err + +# Full run: contextual method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p150.sh b/collaborativeagents/slurm/fullscale/run_contextual_p150.sh new file mode 100755 index 0000000..fb14058 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p150_%j.out +#SBATCH --error=exp_contextual_p150_%j.err + +# Full run: contextual method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_contextual_p50.sh b/collaborativeagents/slurm/fullscale/run_contextual_p50.sh new file mode 100755 index 0000000..8b1788e --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_contextual_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_contextual_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_contextual_p50_%j.out +#SBATCH --error=exp_contextual_p50_%j.err + +# Full run: contextual method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods contextual \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "contextual p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p0.sh b/collaborativeagents/slurm/fullscale/run_rag_p0.sh new file mode 100755 index 0000000..de4f038 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p0_%j.out +#SBATCH --error=exp_rag_p0_%j.err + +# Full run: rag method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p100.sh b/collaborativeagents/slurm/fullscale/run_rag_p100.sh new file mode 100755 index 0000000..c9b9d7e --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p100_%j.out +#SBATCH --error=exp_rag_p100_%j.err + +# Full run: rag method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p150.sh b/collaborativeagents/slurm/fullscale/run_rag_p150.sh new file mode 100755 index 0000000..0ec5e4f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p150_%j.out +#SBATCH --error=exp_rag_p150_%j.err + +# Full run: rag method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_p50.sh b/collaborativeagents/slurm/fullscale/run_rag_p50.sh new file mode 100755 index 0000000..b625300 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_p50_%j.out +#SBATCH --error=exp_rag_p50_%j.err + +# Full run: rag method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh new file mode 100755 index 0000000..1f28f8f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p0_%j.out +#SBATCH --error=exp_rag_vector_p0_%j.err + +# Full run: rag_vector method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh new file mode 100755 index 0000000..b658bab --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p100_%j.out +#SBATCH --error=exp_rag_vector_p100_%j.err + +# Full run: rag_vector method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh new file mode 100755 index 0000000..8c2458f --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p150_%j.out +#SBATCH --error=exp_rag_vector_p150_%j.err + +# Full run: rag_vector method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh b/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh new file mode 100755 index 0000000..afb0164 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_rag_vector_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_rag_vector_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_rag_vector_p50_%j.out +#SBATCH --error=exp_rag_vector_p50_%j.err + +# Full run: rag_vector method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods rag_vector \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "rag_vector p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p0.sh b/collaborativeagents/slurm/fullscale/run_reflection_p0.sh new file mode 100755 index 0000000..f5d5649 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p0_%j.out +#SBATCH --error=exp_reflection_p0_%j.err + +# Full run: reflection method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p100.sh b/collaborativeagents/slurm/fullscale/run_reflection_p100.sh new file mode 100755 index 0000000..68f7047 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p100_%j.out +#SBATCH --error=exp_reflection_p100_%j.err + +# Full run: reflection method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p150.sh b/collaborativeagents/slurm/fullscale/run_reflection_p150.sh new file mode 100755 index 0000000..a451e49 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p150_%j.out +#SBATCH --error=exp_reflection_p150_%j.err + +# Full run: reflection method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_reflection_p50.sh b/collaborativeagents/slurm/fullscale/run_reflection_p50.sh new file mode 100755 index 0000000..dc977d7 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_reflection_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_reflection_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_reflection_p50_%j.out +#SBATCH --error=exp_reflection_p50_%j.err + +# Full run: reflection method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods reflection \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "reflection p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh new file mode 100755 index 0000000..f5706c8 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p0.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p0 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p0_%j.out +#SBATCH --error=exp_vanilla_p0_%j.err + +# Full run: vanilla method, profiles 0-50 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 0 \ + --end-profile 50 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p0-50 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh new file mode 100755 index 0000000..8ca9ce1 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p100.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p100 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p100_%j.out +#SBATCH --error=exp_vanilla_p100_%j.err + +# Full run: vanilla method, profiles 100-150 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 100 \ + --end-profile 150 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p100-150 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh new file mode 100755 index 0000000..07ff6d3 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p150.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p150 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p150_%j.out +#SBATCH --error=exp_vanilla_p150_%j.err + +# Full run: vanilla method, profiles 150-200 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 150 \ + --end-profile 200 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p150-200 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh b/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh new file mode 100755 index 0000000..d77b881 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/run_vanilla_p50.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=exp_vanilla_p50 +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=12:00:00 +#SBATCH --output=exp_vanilla_p50_%j.out +#SBATCH --error=exp_vanilla_p50_%j.err + +# Full run: vanilla method, profiles 50-100 (50 profiles × 15 sessions) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 200 \ + --start-profile 50 \ + --end-profile 100 \ + --n-sessions 15 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_15sess + +echo "vanilla p50-100 complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/submit_all.sh b/collaborativeagents/slurm/fullscale/submit_all.sh new file mode 100644 index 0000000..5b76169 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/submit_all.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Submit all 24 jobs for full-scale experiment +# Total: 200 profiles × 6 methods × 15 sessions = 18,000 sessions +# Split: 6 methods × 4 profile ranges (50 each) = 24 jobs +# Per job: 50 profiles × 15 sessions = 750 sessions ≈ 7-8 hours + +echo "Submitting all 24 jobs for full-scale experiment..." +echo "Total: 200 profiles × 6 methods × 15 sessions = 18,000 sessions" +echo "Split: 24 jobs (6 methods × 4 profile ranges of 50)" +echo "" + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/slurm/fullscale + +JOBS="" + +for script in run_*.sh; do + if [[ "$script" != "generate_jobs.sh" && "$script" != "submit_all.sh" && "$script" != "test_run.sh" ]]; then + JOB_ID=$(sbatch "$script" | awk '{print $4}') + JOBS="$JOBS $JOB_ID" + echo "Submitted $script -> Job ID: $JOB_ID" + fi +done + +echo "" +echo "All jobs submitted!" +echo "Job IDs:$JOBS" +echo "" +echo "Monitor with: squeue -u \$USER" +echo "Check results in: collaborativeagents/results/fullscale_15sess/" diff --git a/collaborativeagents/slurm/fullscale/test_25parallel.sh b/collaborativeagents/slurm/fullscale/test_25parallel.sh new file mode 100644 index 0000000..09d5ddb --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_25parallel.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=test_25parallel +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=1:00:00 +#SBATCH --output=test_25parallel_%j.out +#SBATCH --error=test_25parallel_%j.err + +# Quick test: 25 profiles × 2 sessions × 1 method (vanilla) = 50 sessions +# With 25 parallel profiles to measure realistic throughput + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Test with 25 parallel profiles +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 25 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 25 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_25parallel_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err b/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err new file mode 100644 index 0000000..96ed829 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_25parallel_15649074.err @@ -0,0 +1,386 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +[0;36m(EngineCore_DP0 pid=2749050)[0;0m
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s] +[0;36m(EngineCore_DP0 pid=2749050)[0;0m
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:08<00:26, 8.77s/it] +[0;36m(EngineCore_DP0 pid=2749050)[0;0m
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:10<00:09, 4.67s/it] +[0;36m(EngineCore_DP0 pid=2749050)[0;0m
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:19<00:06, 6.61s/it] +[0;36m(EngineCore_DP0 pid=2749050)[0;0m
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:27<00:00, 7.00s/it] +[0;36m(EngineCore_DP0 pid=2749050)[0;0m
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:27<00:00, 6.77s/it] +[0;36m(EngineCore_DP0 pid=2749050)[0;0m +[0;36m(EngineCore_DP0 pid=2749050)[0;0m
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 0%| | 0/51 [00:00<?, ?it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 4%|▍ | 2/51 [00:00<00:03, 14.49it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 8%|▊ | 4/51 [00:00<00:03, 15.08it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 12%|█▏ | 6/51 [00:00<00:02, 16.11it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 16%|█▌ | 8/51 [00:00<00:02, 16.68it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 22%|██▏ | 11/51 [00:00<00:02, 18.08it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 27%|██▋ | 14/51 [00:00<00:01, 18.87it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 33%|███▎ | 17/51 [00:00<00:01, 19.61it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 39%|███▉ | 20/51 [00:01<00:01, 20.61it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 45%|████▌ | 23/51 [00:01<00:01, 21.26it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 51%|█████ | 26/51 [00:01<00:01, 21.79it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 57%|█████▋ | 29/51 [00:01<00:01, 20.51it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 63%|██████▎ | 32/51 [00:01<00:00, 21.35it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 69%|██████▊ | 35/51 [00:01<00:00, 22.13it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 75%|███████▍ | 38/51 [00:01<00:00, 22.75it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 80%|████████ | 41/51 [00:01<00:00, 23.31it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 86%|████████▋ | 44/51 [00:02<00:00, 23.91it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 92%|█████████▏| 47/51 [00:02<00:00, 23.75it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 98%|█████████▊| 50/51 [00:02<00:00, 24.35it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 51/51 [00:02<00:00, 21.03it/s] +[0;36m(EngineCore_DP0 pid=2749050)[0;0m
Capturing CUDA graphs (decode, FULL): 0%| | 0/35 [00:00<?, ?it/s]
Capturing CUDA graphs (decode, FULL): 3%|▎ | 1/35 [00:00<00:04, 8.11it/s]
Capturing CUDA graphs (decode, FULL): 11%|█▏ | 4/35 [00:00<00:01, 16.75it/s]
Capturing CUDA graphs (decode, FULL): 20%|██ | 7/35 [00:00<00:01, 19.48it/s]
Capturing CUDA graphs (decode, FULL): 29%|██▊ | 10/35 [00:00<00:01, 20.84it/s]
Capturing CUDA graphs (decode, FULL): 37%|███▋ | 13/35 [00:00<00:01, 21.74it/s]
Capturing CUDA graphs (decode, FULL): 46%|████▌ | 16/35 [00:00<00:00, 22.44it/s]
Capturing CUDA graphs (decode, FULL): 54%|█████▍ | 19/35 [00:00<00:00, 22.82it/s]
Capturing CUDA graphs (decode, FULL): 63%|██████▎ | 22/35 [00:01<00:00, 23.12it/s]
Capturing CUDA graphs (decode, FULL): 71%|███████▏ | 25/35 [00:01<00:00, 23.29it/s]
Capturing CUDA graphs (decode, FULL): 80%|████████ | 28/35 [00:01<00:00, 23.60it/s]
Capturing CUDA graphs (decode, FULL): 89%|████████▊ | 31/35 [00:01<00:00, 23.89it/s]
Capturing CUDA graphs (decode, FULL): 97%|█████████▋| 34/35 [00:01<00:00, 24.07it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 22.39it/s] +[0;36m(APIServer pid=2748545)[0;0m INFO: Started server process [2748545] +[0;36m(APIServer pid=2748545)[0;0m INFO: Waiting for application startup. +[0;36m(APIServer pid=2748545)[0;0m INFO: Application startup complete. +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +2026-01-26 09:27:47,068 - INFO - Loaded dataset: math-hard +2026-01-26 09:27:47,068 - INFO - Profile path configured: ../data/complex_profiles_v2/profiles_200.jsonl +2026-01-26 09:27:47,089 - INFO - Loaded 200 profiles from ../data/complex_profiles_v2/profiles_200.jsonl +2026-01-26 09:27:47,090 - INFO - Running method: vanilla +2026-01-26 09:27:47,092 - INFO - Running profiles 0 to 24 (25 remaining) +2026-01-26 09:27:47,092 - INFO - Using BATCH processing (async OpenAI user) for vanilla +2026-01-26 09:27:51,946 - INFO - Batch: 25 profiles, 50 sessions remaining +2026-01-26 09:28:12,109 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:12,493 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:13,059 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:14,056 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:14,096 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:16,728 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:17,048 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:17,207 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:17,852 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:17,921 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:18,025 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:18,315 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:18,717 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:18,721 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:19,139 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:19,426 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:19,559 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:19,669 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:20,116 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:20,490 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:21,361 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:22,541 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:26,115 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:27,029 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:28:30,969 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:28,992 - INFO - Retrying request to /chat/completions in 0.497164 seconds +2026-01-26 09:30:49,151 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:51,550 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:51,602 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:52,002 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:52,304 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:53,195 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:55,825 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:56,818 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:57,147 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:59,974 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:02,648 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:02,990 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:04,612 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:04,820 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:05,756 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:06,486 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:08,186 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:09,519 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:14,224 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:14,961 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:16,471 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:21,750 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:25,200 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:25,420 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:26,474 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:32:17,226 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:34:46,144 - INFO - Retrying request to /chat/completions in 0.408735 seconds +2026-01-26 09:35:08,723 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:08,941 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:09,311 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:10,222 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:11,410 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:14,879 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:16,671 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:16,957 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:17,153 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:17,391 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:19,785 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:20,686 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:21,933 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:22,094 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:23,911 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:24,217 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:27,229 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:28,836 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:30,822 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:33,169 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:34,386 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:39,633 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:40,925 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:06,800 - INFO - Retrying request to /chat/completions in 0.380454 seconds +2026-01-26 09:38:24,263 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:25,450 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:27,291 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:28,109 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:29,229 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:29,236 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:29,517 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:29,743 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:30,599 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:31,944 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:34,329 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:34,755 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:37,718 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:38,547 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:39,658 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:41,005 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:43,610 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:44,813 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:50,989 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:38:54,975 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:39:00,332 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:39:01,476 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:24,167 - INFO - Retrying request to /chat/completions in 0.497047 seconds +2026-01-26 09:41:40,878 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:41,762 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:42,741 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:43,847 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:44,016 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:44,972 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:45,498 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:47,265 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:48,304 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:49,325 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:51,122 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:51,989 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:52,039 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:52,795 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:53,205 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:55,942 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:56,227 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:57,168 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:41:57,981 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:00,909 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:04,535 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:20,690 - INFO - Retrying request to /chat/completions in 0.488344 seconds +2026-01-26 09:44:35,415 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:37,095 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:41,452 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:42,469 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:42,474 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:42,828 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:44,768 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:44,896 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:46,075 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:48,796 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:49,016 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:50,272 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:50,282 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:51,365 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:52,454 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:54,527 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:55,603 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:44:56,960 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:45:04,617 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:19,945 - INFO - Retrying request to /chat/completions in 0.436698 seconds +2026-01-26 09:47:35,313 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:37,522 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:39,093 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:39,424 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:39,624 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:40,126 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:40,752 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:40,903 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:42,081 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:43,785 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:44,603 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:44,614 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:46,264 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:52,539 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:53,004 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:56,231 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:47:57,495 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:06,564 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:20,835 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:50:37,276 - INFO - Retrying request to /chat/completions in 0.452468 seconds +2026-01-26 09:50:51,899 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:50:53,721 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:50:55,014 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:50:55,582 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:50:55,590 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:50:59,402 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:50:59,582 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:50:59,882 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:00,612 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:01,288 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:02,592 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:02,770 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:03,504 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:03,723 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:04,449 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:04,767 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:05,280 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:06,810 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:51:34,003 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:53:51,601 - INFO - Session round 1/2: 25 total, 58 sessions/hr +2026-01-26 09:53:51,604 - INFO - Retrying request to /chat/completions in 0.444179 seconds +2026-01-26 09:54:06,889 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:11,520 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:12,150 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:12,415 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:12,433 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:13,872 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:14,604 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:15,711 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:15,926 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:16,676 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:16,752 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:20,312 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:20,851 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:22,383 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:23,091 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:23,274 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:23,448 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:23,779 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:24,746 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:24,908 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:25,362 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:25,671 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:38,199 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:43,115 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:45,969 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:05,543 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:56:42,149 - INFO - Retrying request to /chat/completions in 0.419778 seconds +2026-01-26 09:56:57,132 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:02,101 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:02,138 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:06,250 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:09,388 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:10,027 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:11,156 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:11,977 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:12,355 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:13,457 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:13,470 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:13,663 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:14,452 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:15,062 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:18,548 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:20,788 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:23,961 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:31,290 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:32,055 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:33,034 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:35,374 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:36,838 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:39,944 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:40,056 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:40,920 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:57:59,606 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:58:27,720 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:58:33,775 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:58:34,589 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:46,860 - INFO - Retrying request to /chat/completions in 0.416438 seconds +2026-01-26 10:01:09,640 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:10,750 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:14,267 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:15,896 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:21,140 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:21,415 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:22,737 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:27,180 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:29,997 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:30,388 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:30,773 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:34,505 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:34,894 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:37,212 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:39,069 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:39,363 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:43,345 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:43,923 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:46,439 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:47,877 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:51,629 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:02:02,535 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:02:14,872 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:02:15,377 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:02:16,444 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:02:38,111 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:03:35,818 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:05:41,154 - INFO - Retrying request to /chat/completions in 0.399071 seconds +2026-01-26 10:05:57,816 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:02,020 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:04,719 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:06,046 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:08,946 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:10,046 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:10,720 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:10,873 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:13,979 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:15,928 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:16,923 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:17,044 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:23,932 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:36,979 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:37,422 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:42,713 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:42,728 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:42,734 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:44,033 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:44,136 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:12,836 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:13,878 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:14,207 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:36,442 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:59,236 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:08:22,397 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:11,478 - INFO - Retrying request to /chat/completions in 0.409762 seconds +2026-01-26 10:10:35,080 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:35,410 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:35,818 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:36,280 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:37,382 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:38,684 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:41,309 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:42,476 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:43,322 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:10:55,627 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:11:04,211 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:11:04,302 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:11:04,372 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:11:05,028 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:11:18,536 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:11:18,718 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:11:42,576 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:11:52,841 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:01,265 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:02,718 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:51,250 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:16,124 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:15:06,339 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:16:45,339 - INFO - Retrying request to /chat/completions in 0.479317 seconds +2026-01-26 10:17:06,803 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:07,949 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:10,203 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:10,510 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:11,396 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:11,643 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:15,085 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:15,376 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:15,488 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:36,253 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:37,970 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:37,998 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:17:40,137 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:13,980 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:47,227 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:16,033 - INFO - Retrying request to /chat/completions in 0.385396 seconds +2026-01-26 10:20:41,346 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:43,646 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:43,738 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:43,752 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:44,201 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:48,133 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:49,531 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:53,768 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:57,170 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:21:01,116 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:21:11,644 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:21:29,714 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:22:19,469 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:22:24,392 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:23:53,532 - INFO - Retrying request to /chat/completions in 0.431884 seconds +2026-01-26 10:24:16,778 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:17,281 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:20,169 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:22,116 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:25,715 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:26,553 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:28,568 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:33,226 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:40,874 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:24:41,777 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +[2026-01-26T10:24:42.210] error: *** JOB 15649074 ON gpua050 CANCELLED AT 2026-01-26T10:24:42 DUE TO TIME LIMIT *** diff --git a/collaborativeagents/slurm/fullscale/test_50parallel.sh b/collaborativeagents/slurm/fullscale/test_50parallel.sh new file mode 100644 index 0000000..0a48e2d --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_50parallel.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#SBATCH --job-name=test_50parallel +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=1:00:00 +#SBATCH --output=test_50parallel_%j.out +#SBATCH --error=test_50parallel_%j.err + +# Test: 50 profiles × 2 sessions = 100 sessions with 50 parallel profiles +# Compare throughput vs 25 parallel + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +# Increase GPU utilization to 60% for higher throughput +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.6 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 50 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 50 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_50parallel_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err b/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err new file mode 100644 index 0000000..358fd24 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_50parallel_15649149.err @@ -0,0 +1,504 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +[0;36m(EngineCore_DP0 pid=2003864)[0;0m
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s] +[0;36m(EngineCore_DP0 pid=2003864)[0;0m
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:05<00:17, 5.94s/it] +[0;36m(EngineCore_DP0 pid=2003864)[0;0m
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:07<00:06, 3.30s/it] +[0;36m(EngineCore_DP0 pid=2003864)[0;0m
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:13<00:04, 4.70s/it] +[0;36m(EngineCore_DP0 pid=2003864)[0;0m
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:18<00:00, 4.88s/it] +[0;36m(EngineCore_DP0 pid=2003864)[0;0m
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:18<00:00, 4.73s/it] +[0;36m(EngineCore_DP0 pid=2003864)[0;0m +[0;36m(EngineCore_DP0 pid=2003864)[0;0m
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 0%| | 0/51 [00:00<?, ?it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 4%|▍ | 2/51 [00:00<00:03, 14.62it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 8%|▊ | 4/51 [00:00<00:03, 14.95it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 12%|█▏ | 6/51 [00:00<00:02, 16.03it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 16%|█▌ | 8/51 [00:00<00:02, 16.65it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 22%|██▏ | 11/51 [00:00<00:02, 17.98it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 27%|██▋ | 14/51 [00:00<00:01, 18.83it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 33%|███▎ | 17/51 [00:00<00:01, 19.62it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 39%|███▉ | 20/51 [00:01<00:01, 20.64it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 45%|████▌ | 23/51 [00:01<00:01, 21.15it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 51%|█████ | 26/51 [00:01<00:01, 21.74it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 57%|█████▋ | 29/51 [00:01<00:01, 20.32it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 63%|██████▎ | 32/51 [00:01<00:00, 21.27it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 69%|██████▊ | 35/51 [00:01<00:00, 22.12it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 75%|███████▍ | 38/51 [00:01<00:00, 22.79it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 80%|████████ | 41/51 [00:01<00:00, 23.41it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 86%|████████▋ | 44/51 [00:02<00:00, 24.07it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 92%|█████████▏| 47/51 [00:02<00:00, 24.00it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 98%|█████████▊| 50/51 [00:02<00:00, 24.68it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 51/51 [00:02<00:00, 21.04it/s] +[0;36m(EngineCore_DP0 pid=2003864)[0;0m
Capturing CUDA graphs (decode, FULL): 0%| | 0/35 [00:00<?, ?it/s]
Capturing CUDA graphs (decode, FULL): 3%|▎ | 1/35 [00:00<00:04, 7.74it/s]
Capturing CUDA graphs (decode, FULL): 11%|█▏ | 4/35 [00:00<00:01, 16.62it/s]
Capturing CUDA graphs (decode, FULL): 20%|██ | 7/35 [00:00<00:01, 19.54it/s]
Capturing CUDA graphs (decode, FULL): 29%|██▊ | 10/35 [00:00<00:01, 21.02it/s]
Capturing CUDA graphs (decode, FULL): 37%|███▋ | 13/35 [00:00<00:00, 22.01it/s]
Capturing CUDA graphs (decode, FULL): 46%|████▌ | 16/35 [00:00<00:00, 22.69it/s]
Capturing CUDA graphs (decode, FULL): 54%|█████▍ | 19/35 [00:00<00:00, 23.12it/s]
Capturing CUDA graphs (decode, FULL): 63%|██████▎ | 22/35 [00:01<00:00, 23.45it/s]
Capturing CUDA graphs (decode, FULL): 71%|███████▏ | 25/35 [00:01<00:00, 23.73it/s]
Capturing CUDA graphs (decode, FULL): 80%|████████ | 28/35 [00:01<00:00, 24.03it/s]
Capturing CUDA graphs (decode, FULL): 89%|████████▊ | 31/35 [00:01<00:00, 24.21it/s]
Capturing CUDA graphs (decode, FULL): 97%|█████████▋| 34/35 [00:01<00:00, 24.31it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 22.61it/s] +[0;36m(APIServer pid=2003364)[0;0m INFO: Started server process [2003364] +[0;36m(APIServer pid=2003364)[0;0m INFO: Waiting for application startup. +[0;36m(APIServer pid=2003364)[0;0m INFO: Application startup complete. +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +2026-01-26 09:29:43,637 - INFO - Loaded dataset: math-hard +2026-01-26 09:29:43,637 - INFO - Profile path configured: ../data/complex_profiles_v2/profiles_200.jsonl +2026-01-26 09:29:43,658 - INFO - Loaded 200 profiles from ../data/complex_profiles_v2/profiles_200.jsonl +2026-01-26 09:29:43,659 - INFO - Running method: vanilla +2026-01-26 09:29:43,660 - INFO - Running profiles 0 to 49 (50 remaining) +2026-01-26 09:29:43,660 - INFO - Using BATCH processing (async OpenAI user) for vanilla +2026-01-26 09:29:46,941 - INFO - Batch: 50 profiles, 100 sessions remaining +2026-01-26 09:30:06,454 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:08,319 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:08,451 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:08,842 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:09,445 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:09,759 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:10,606 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:10,842 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:13,059 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:13,117 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:13,149 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:13,240 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:13,644 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:13,823 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:14,114 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:14,450 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:16,522 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:17,201 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:17,277 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:17,616 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:18,027 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:18,492 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:20,496 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:21,021 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:21,193 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:21,371 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:21,510 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:21,833 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:24,294 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:27,007 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:32,273 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:34,151 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:34,690 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:34,758 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:35,910 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:36,850 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:37,630 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:40,618 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:41,190 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:42,393 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:42,786 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:44,872 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:46,049 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:46,231 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:46,297 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:47,229 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:48,236 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:48,781 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:52,819 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:30:57,372 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:31:13,018 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:34,869 - INFO - Retrying request to /chat/completions in 0.439588 seconds +2026-01-26 09:35:57,466 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:59,282 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:35:59,318 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:00,650 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:00,770 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:01,826 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:04,364 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:05,040 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:06,043 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:06,251 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:06,340 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:06,460 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:06,777 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:06,784 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:07,939 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:09,820 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:11,725 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:11,939 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:13,530 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:13,664 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:14,413 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:14,652 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:16,100 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:16,352 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:16,839 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:18,001 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:18,441 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:21,080 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:24,896 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:25,097 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:26,581 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:26,911 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:27,043 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:28,834 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:29,102 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:30,211 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:30,514 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:32,879 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:34,460 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:37,520 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:38,116 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:39,596 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:41,792 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:44,867 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:45,135 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:45,736 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:51,109 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:55,416 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:57,749 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:36:59,238 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:13,030 - INFO - Retrying request to /chat/completions in 0.400120 seconds +2026-01-26 09:42:30,585 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:30,892 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:31,450 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:33,206 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:35,779 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:36,014 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:36,083 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:36,327 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:36,398 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:36,620 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:36,796 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:37,385 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:37,527 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:38,422 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:40,279 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:40,866 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:41,680 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:42,121 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:42,257 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:43,928 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:43,971 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:45,107 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:46,582 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:46,663 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:47,554 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:47,871 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:49,446 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:49,556 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:51,016 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:51,895 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:52,636 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:52,757 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:53,376 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:53,935 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:42:59,986 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:01,144 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:03,484 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:04,528 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:04,605 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:05,172 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:05,330 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:05,565 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:06,122 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:06,168 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:11,286 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:14,390 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:18,542 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:20,316 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:43:30,080 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:28,792 - INFO - Retrying request to /chat/completions in 0.405667 seconds +2026-01-26 09:48:45,809 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:48,709 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:48,991 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:50,876 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:50,995 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:52,823 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:52,903 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:52,959 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:53,382 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:53,445 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:53,557 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:53,729 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:53,902 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:54,511 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:56,151 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:56,347 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:48:59,690 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:00,321 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:01,044 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:01,755 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:02,910 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:04,012 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:05,573 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:05,928 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:07,146 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:07,622 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:10,257 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:10,921 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:12,097 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:12,780 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:13,374 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:14,368 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:14,869 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:15,727 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:15,874 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:16,892 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:18,076 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:18,132 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:18,751 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:19,863 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:20,866 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:21,057 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:23,432 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:26,466 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:31,006 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:49:49,558 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:36,689 - INFO - Retrying request to /chat/completions in 0.430394 seconds +2026-01-26 09:54:54,293 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:54,487 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:54,829 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:56,069 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:57,256 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:57,551 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:57,644 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:57,650 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:57,799 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:58,401 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:54:58,714 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:00,103 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:01,298 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:01,735 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:04,111 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:04,116 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:04,320 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:05,903 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:05,972 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:07,051 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:07,430 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:07,842 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:08,243 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:09,065 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:11,827 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:11,834 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:12,691 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:14,225 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:16,456 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:16,543 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:18,805 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:20,407 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:20,887 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:22,644 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:23,408 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:23,884 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:23,986 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:24,368 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:25,977 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:26,109 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:26,887 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:27,193 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:28,219 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:29,328 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 09:55:52,261 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:21,589 - INFO - Retrying request to /chat/completions in 0.397408 seconds +2026-01-26 10:00:36,291 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:36,955 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:37,823 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:38,707 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:39,517 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:41,582 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:42,114 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:42,551 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:42,721 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:42,969 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:42,988 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:43,058 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:43,342 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:44,557 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:44,657 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:45,109 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:45,269 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:45,509 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:45,799 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:45,801 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:46,105 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:47,159 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:47,427 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:47,816 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:48,604 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:49,689 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:49,739 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:51,089 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:52,928 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:54,907 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:55,184 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:56,598 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:59,118 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:00:59,531 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:02,545 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:03,044 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:03,338 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:06,460 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:08,416 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:10,499 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:12,694 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:15,341 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:01:43,942 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:17,314 - INFO - Retrying request to /chat/completions in 0.385894 seconds +2026-01-26 10:06:32,587 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:33,300 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:37,112 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:38,036 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:38,408 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:38,484 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:38,507 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:40,072 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:40,536 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:41,018 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:41,092 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:41,686 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:41,761 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:41,945 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:42,008 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:43,444 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:44,347 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:44,820 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:44,906 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:45,643 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:45,780 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:45,909 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:47,560 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:47,605 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:48,560 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:50,804 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:54,243 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:54,294 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:57,079 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:57,277 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:58,041 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:06:58,423 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:01,704 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:06,037 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:07,673 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:12,023 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:16,183 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:20,226 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:22,800 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:26,791 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:07:54,018 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:22,041 - INFO - Retrying request to /chat/completions in 0.454918 seconds +2026-01-26 10:12:36,789 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:40,297 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:41,629 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:42,714 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:42,757 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:44,028 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:45,344 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:46,438 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:46,878 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:47,563 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:47,805 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:48,866 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:49,145 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:49,361 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:49,564 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:49,825 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:50,085 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:51,229 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:51,874 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:51,975 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:53,724 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:53,785 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:54,109 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:54,266 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:54,808 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:55,152 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:56,596 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:12:57,495 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:00,949 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:02,102 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:04,013 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:05,406 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:07,431 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:08,551 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:09,641 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:13,580 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:14,384 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:15,505 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:13:28,509 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:02,217 - INFO - Session round 1/2: 50 total, 62 sessions/hr +2026-01-26 10:18:02,221 - INFO - Retrying request to /chat/completions in 0.390137 seconds +2026-01-26 10:18:23,953 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:25,452 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:25,533 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:26,172 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:26,808 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:26,971 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:27,348 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:27,652 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:28,804 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:28,808 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:29,551 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:30,377 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:31,132 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:31,405 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:31,644 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:31,698 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:31,953 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:32,283 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:32,636 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:32,842 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:32,988 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:33,279 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:35,981 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:36,043 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:36,225 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:40,684 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:41,084 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:42,000 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:42,697 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:48,873 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:49,383 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:50,056 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:51,900 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:52,030 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:52,995 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:54,433 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:54,917 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:56,763 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:18:58,988 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:00,456 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:01,784 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:02,251 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:03,579 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:03,788 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:08,994 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:10,374 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:10,942 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:18,374 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:23,243 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:19:25,144 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:20:57,966 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:21:27,035 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:12,495 - INFO - Retrying request to /chat/completions in 0.494117 seconds +2026-01-26 10:25:29,226 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:32,090 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:36,992 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:37,517 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:37,586 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:37,687 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:38,600 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:39,260 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:39,436 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:41,983 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:42,064 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:42,731 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:43,529 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:44,137 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:44,812 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:44,870 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:45,136 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:45,455 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:46,477 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:48,792 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:49,168 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:51,257 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:51,762 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:53,145 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:58,678 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:25:59,306 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:01,408 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:03,426 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:04,074 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:04,416 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:05,245 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:07,119 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:07,177 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:07,274 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:08,778 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:08,985 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:09,589 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:09,620 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:10,369 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:13,493 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:14,494 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:18,082 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:19,637 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:19,726 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:27,344 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:27,496 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:30,236 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:35,051 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:35,315 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:37,814 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:41,709 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:26:42,540 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +[2026-01-26T10:26:43.011] error: *** JOB 15649149 ON gpua019 CANCELLED AT 2026-01-26T10:26:43 DUE TO TIME LIMIT *** diff --git a/collaborativeagents/slurm/fullscale/test_batch_fix.sh b/collaborativeagents/slurm/fullscale/test_batch_fix.sh new file mode 100644 index 0000000..ae87d99 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_batch_fix.sh @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --job-name=test_batch_fix +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=0:30:00 +#SBATCH --output=test_batch_fix_%j.out +#SBATCH --error=test_batch_fix_%j.err + +# Quick test: Verify batched agent calls fix +# 10 profiles × 2 sessions = 20 sessions with 10 parallel +# Should see ~500+ sessions/hr with batching (vs ~60 without) + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +# Start vLLM server +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM ready." + +cd collaborativeagents/scripts + +# Test with vanilla (simplest method) +echo "=== Testing batched agent calls ===" +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 10 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 10 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_batch_fix_$(date +%Y%m%d_%H%M%S) + +echo "Test complete!" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err b/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err new file mode 100644 index 0000000..a7574bf --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_batch_fix_15651956.err @@ -0,0 +1,165 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +[0;36m(EngineCore_DP0 pid=2779888)[0;0m
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s] +[0;36m(EngineCore_DP0 pid=2779888)[0;0m
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:06<00:18, 6.31s/it] +[0;36m(EngineCore_DP0 pid=2779888)[0;0m
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:07, 3.60s/it] +[0;36m(EngineCore_DP0 pid=2779888)[0;0m
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:15<00:05, 5.44s/it] +[0;36m(EngineCore_DP0 pid=2779888)[0;0m
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:21<00:00, 5.61s/it] +[0;36m(EngineCore_DP0 pid=2779888)[0;0m
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:21<00:00, 5.37s/it] +[0;36m(EngineCore_DP0 pid=2779888)[0;0m +[0;36m(EngineCore_DP0 pid=2779888)[0;0m
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 0%| | 0/51 [00:00<?, ?it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 4%|▍ | 2/51 [00:00<00:03, 14.78it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 8%|▊ | 4/51 [00:00<00:03, 15.51it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 12%|█▏ | 6/51 [00:00<00:02, 16.09it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 16%|█▌ | 8/51 [00:00<00:02, 16.68it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 22%|██▏ | 11/51 [00:00<00:02, 18.13it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 27%|██▋ | 14/51 [00:00<00:01, 18.95it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 33%|███▎ | 17/51 [00:00<00:01, 19.71it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 39%|███▉ | 20/51 [00:01<00:01, 20.71it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 45%|████▌ | 23/51 [00:01<00:01, 21.34it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 51%|█████ | 26/51 [00:01<00:01, 21.87it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 57%|█████▋ | 29/51 [00:01<00:00, 22.02it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 63%|██████▎ | 32/51 [00:01<00:00, 22.48it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 69%|██████▊ | 35/51 [00:01<00:00, 22.97it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 75%|███████▍ | 38/51 [00:01<00:00, 23.36it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 80%|████████ | 41/51 [00:01<00:00, 23.76it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 86%|████████▋ | 44/51 [00:02<00:00, 24.28it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 92%|█████████▏| 47/51 [00:02<00:00, 24.65it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 98%|█████████▊| 50/51 [00:02<00:00, 25.04it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 51/51 [00:02<00:00, 21.70it/s] +[0;36m(EngineCore_DP0 pid=2779888)[0;0m
Capturing CUDA graphs (decode, FULL): 0%| | 0/35 [00:00<?, ?it/s]
Capturing CUDA graphs (decode, FULL): 6%|▌ | 2/35 [00:00<00:01, 19.85it/s]
Capturing CUDA graphs (decode, FULL): 14%|█▍ | 5/35 [00:00<00:01, 21.79it/s]
Capturing CUDA graphs (decode, FULL): 23%|██▎ | 8/35 [00:00<00:01, 22.38it/s]
Capturing CUDA graphs (decode, FULL): 31%|███▏ | 11/35 [00:00<00:01, 22.76it/s]
Capturing CUDA graphs (decode, FULL): 40%|████ | 14/35 [00:00<00:00, 23.18it/s]
Capturing CUDA graphs (decode, FULL): 49%|████▊ | 17/35 [00:00<00:00, 23.43it/s]
Capturing CUDA graphs (decode, FULL): 57%|█████▋ | 20/35 [00:00<00:00, 23.60it/s]
Capturing CUDA graphs (decode, FULL): 66%|██████▌ | 23/35 [00:00<00:00, 23.69it/s]
Capturing CUDA graphs (decode, FULL): 74%|███████▍ | 26/35 [00:01<00:00, 23.90it/s]
Capturing CUDA graphs (decode, FULL): 83%|████████▎ | 29/35 [00:01<00:00, 24.13it/s]
Capturing CUDA graphs (decode, FULL): 91%|█████████▏| 32/35 [00:01<00:00, 24.33it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 24.53it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 23.67it/s] +[0;36m(APIServer pid=2779820)[0;0m INFO: Started server process [2779820] +[0;36m(APIServer pid=2779820)[0;0m INFO: Waiting for application startup. +[0;36m(APIServer pid=2779820)[0;0m INFO: Application startup complete. +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +2026-01-26 10:49:25,310 - INFO - Loaded dataset: math-hard +2026-01-26 10:49:25,310 - INFO - Profile path configured: ../data/complex_profiles_v2/profiles_200.jsonl +2026-01-26 10:49:25,331 - INFO - Loaded 200 profiles from ../data/complex_profiles_v2/profiles_200.jsonl +2026-01-26 10:49:25,332 - INFO - Running method: vanilla +2026-01-26 10:49:25,333 - INFO - Running profiles 0 to 9 (10 remaining) +2026-01-26 10:49:25,333 - INFO - Using BATCH processing (async OpenAI user) for vanilla +2026-01-26 10:49:27,629 - INFO - Batch: 10 profiles, 20 sessions remaining +2026-01-26 10:49:51,327 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:49:54,771 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:49:55,968 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:49:55,971 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:49:56,111 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:49:58,494 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:50:00,365 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:50:00,481 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:50:01,374 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:50:02,557 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:50:31,911 - INFO - Retrying request to /chat/completions in 0.472652 seconds +2026-01-26 10:50:55,766 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:00,321 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:01,778 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:02,348 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:03,035 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:07,706 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:10,831 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:17,046 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:20,228 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:51:29,972 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:52:18,950 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:52:48,948 - INFO - Retrying request to /chat/completions in 0.401385 seconds +2026-01-26 10:53:08,512 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:53:17,427 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:53:17,958 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:53:20,633 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:53:23,267 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:53:24,807 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:53:25,462 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:53:39,454 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:53:45,026 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:54:42,712 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:14,191 - INFO - Retrying request to /chat/completions in 0.465460 seconds +2026-01-26 10:55:30,981 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:36,802 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:36,998 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:37,952 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:42,469 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:46,764 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:48,435 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:55,025 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:55:56,037 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:56:16,243 - INFO - Retrying request to /chat/completions in 0.475638 seconds +2026-01-26 10:56:33,092 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:56:38,487 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:56:43,531 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:56:44,094 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:56:44,184 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:56:48,210 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:56:52,409 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:56:58,024 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:57:03,714 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:57:17,816 - INFO - Retrying request to /chat/completions in 0.475195 seconds +2026-01-26 10:57:38,360 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:57:43,248 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:57:44,051 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:57:53,040 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:58:04,492 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:58:09,427 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:58:22,280 - INFO - Retrying request to /chat/completions in 0.427496 seconds +2026-01-26 10:58:48,028 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:58:50,995 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:59:05,351 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:59:19,341 - INFO - Retrying request to /chat/completions in 0.401335 seconds +2026-01-26 10:59:37,858 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:59:43,064 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 10:59:52,406 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:03,266 - INFO - Session round 1/2: 10 total, 56 sessions/hr +2026-01-26 11:00:03,268 - INFO - Retrying request to /chat/completions in 0.441368 seconds +2026-01-26 11:00:23,708 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:26,029 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:26,087 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:27,850 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:28,241 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:30,520 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:31,723 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:32,826 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:37,576 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:39,479 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:00:47,699 - INFO - Retrying request to /chat/completions in 0.433057 seconds +2026-01-26 11:01:08,417 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:11,013 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:12,133 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:12,170 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:23,969 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:27,863 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:29,426 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:48,623 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:49,104 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:01:54,091 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:02:31,426 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:02:46,245 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:02:57,910 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:03:06,851 - INFO - Retrying request to /chat/completions in 0.429348 seconds +2026-01-26 11:03:38,900 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:03:40,960 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:03:42,031 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:04:11,946 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:04:12,383 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:04:15,523 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:04:16,260 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:04:18,787 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:04:19,674 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:05:49,055 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:06:00,150 - INFO - Retrying request to /chat/completions in 0.448308 seconds +2026-01-26 11:06:35,563 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:06:44,567 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:06:44,848 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:06:52,764 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:06:54,443 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:06:54,793 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:06:56,054 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:08:54,799 - INFO - Retrying request to /chat/completions in 0.489046 seconds +2026-01-26 11:09:48,962 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:09:59,678 - INFO - Retrying request to /chat/completions in 0.414219 seconds +2026-01-26 11:10:23,531 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:10:23,684 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:10:34,012 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:10:38,223 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:10:41,674 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:10:59,720 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:12:50,321 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:14:34,418 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:14:45,079 - INFO - Retrying request to /chat/completions in 0.408414 seconds +2026-01-26 11:15:06,196 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:15:16,148 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:15:37,062 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:15:37,073 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:15:48,411 - INFO - Retrying request to /chat/completions in 0.397816 seconds +2026-01-26 11:16:22,670 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:16:32,537 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:16:34,381 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:16:54,692 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:17:06,402 - INFO - Retrying request to /chat/completions in 0.418484 seconds +2026-01-26 11:17:25,760 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:17:50,338 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:17:57,400 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +2026-01-26 11:18:01,918 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" +[2026-01-26T11:18:21.428] error: *** JOB 15651956 ON gpua050 CANCELLED AT 2026-01-26T11:18:21 DUE TO TIME LIMIT *** diff --git a/collaborativeagents/slurm/fullscale/test_local_user.sh b/collaborativeagents/slurm/fullscale/test_local_user.sh new file mode 100644 index 0000000..8374a93 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_local_user.sh @@ -0,0 +1,94 @@ +#!/bin/bash +#SBATCH --job-name=test_local_user +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=32 +#SBATCH --gres=gpu:nvidia_a100:4 +#SBATCH --mem=200G +#SBATCH --time=1:00:00 +#SBATCH --output=test_local_user_%j.out +#SBATCH --error=test_local_user_%j.err + +# Test with LOCAL vLLM user simulator (70B AWQ) instead of OpenAI +# Expected: ~2000+ sessions/hr (vs ~60 with OpenAI API latency) +# +# GPU Layout: +# GPU 0-1: 70B user simulator (AWQ INT4, TP=2) +# GPU 2: 8B agent (~24GB) +# GPU 3: Available for embedding/reranker if needed + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +set -a +source .env +set +a + +# Models +MODEL_70B="hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4" +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +# Kill any existing vLLM servers +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 3 + +echo "=== Starting 70B User Simulator (GPU 0-1, TP=2) ===" +CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_70B \ + --port 8004 \ + --tensor-parallel-size 2 \ + --gpu-memory-utilization 0.90 \ + --max-model-len 4096 \ + --quantization awq \ + --dtype float16 \ + --disable-log-requests \ + --guided-decoding-backend outlines & + +echo "=== Starting 8B Agent (GPU 2) ===" +CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B \ + --port 8003 \ + --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.50 \ + --max-model-len 8192 \ + --dtype bfloat16 \ + --disable-log-requests & + +# Wait for both servers +echo "Waiting for vLLM servers..." +for port in 8004 8003; do + for i in $(seq 1 120); do + curl -s http://localhost:$port/health > /dev/null 2>&1 && break + sleep 2 + done + echo " Port $port ready." +done + +cd collaborativeagents/scripts + +echo "" +echo "=== Running Test: 10 profiles × 2 sessions with LOCAL user simulator ===" +python run_experiments.py \ + --methods vanilla \ + --datasets math-hard \ + --n-profiles 10 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --vllm-user-url http://localhost:8004/v1 \ + --vllm-agent-url http://localhost:8003/v1 \ + --reward-mode llm \ + --parallel-profiles 10 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/test_local_user_$(date +%Y%m%d_%H%M%S) + +echo "" +echo "=== Test Complete ===" +pkill -f "vllm.entrypoints" 2>/dev/null || true diff --git a/collaborativeagents/slurm/fullscale/test_local_user_15652698.err b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err new file mode 100644 index 0000000..4acc458 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err @@ -0,0 +1,215 @@ +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +usage: api_server.py [-h] [--headless] [--api-server-count API_SERVER_COUNT] + [--config CONFIG] [--host HOST] [--port PORT] [--uds UDS] + [--uvicorn-log-level {critical,debug,error,info,trace,warning}] + [--disable-uvicorn-access-log | --no-disable-uvicorn-access-log] + [--allow-credentials | --no-allow-credentials] + [--allowed-origins ALLOWED_ORIGINS] + [--allowed-methods ALLOWED_METHODS] + [--allowed-headers ALLOWED_HEADERS] + [--api-key API_KEY [API_KEY ...]] + [--lora-modules LORA_MODULES [LORA_MODULES ...]] + [--chat-template CHAT_TEMPLATE] + [--chat-template-content-format {auto,openai,string}] + [--trust-request-chat-template | --no-trust-request-chat-template] + [--response-role RESPONSE_ROLE] + [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] + [--ssl-ca-certs SSL_CA_CERTS] + [--enable-ssl-refresh | --no-enable-ssl-refresh] + [--ssl-cert-reqs SSL_CERT_REQS] [--root-path ROOT_PATH] + [--middleware MIDDLEWARE] + [--return-tokens-as-token-ids | --no-return-tokens-as-token-ids] + [--disable-frontend-multiprocessing | --no-disable-frontend-multiprocessing] + [--enable-request-id-headers | --no-enable-request-id-headers] + [--enable-auto-tool-choice | --no-enable-auto-tool-choice] + [--exclude-tools-when-tool-choice-none | --no-exclude-tools-when-tool-choice-none] + [--tool-call-parser {deepseek_v3,deepseek_v31,deepseek_v32,ernie45,gigachat3,glm45,granite,granite-20b-fc,hermes,hunyuan_a13b,internlm,jamba,kimi_k2,llama3_json,llama4_json,llama4_pythonic,longcat,minimax,minimax_m2,mistral,olmo3,openai,phi4_mini_json,pythonic,qwen3_coder,qwen3_xml,seed_oss,step3,xlam} or name registered in --tool-parser-plugin] + [--tool-parser-plugin TOOL_PARSER_PLUGIN] + [--tool-server TOOL_SERVER] + [--log-config-file LOG_CONFIG_FILE] + [--max-log-len MAX_LOG_LEN] + [--disable-fastapi-docs | --no-disable-fastapi-docs] + [--enable-prompt-tokens-details | --no-enable-prompt-tokens-details] + [--enable-server-load-tracking | --no-enable-server-load-tracking] + [--enable-force-include-usage | --no-enable-force-include-usage] + [--enable-tokenizer-info-endpoint | --no-enable-tokenizer-info-endpoint] + [--enable-log-outputs | --no-enable-log-outputs] + [--h11-max-incomplete-event-size H11_MAX_INCOMPLETE_EVENT_SIZE] + [--h11-max-header-count H11_MAX_HEADER_COUNT] + [--log-error-stack | --no-log-error-stack] + [--tokens-only | --no-tokens-only] [--model MODEL] + [--runner {auto,draft,generate,pooling}] + [--convert {auto,classify,embed,none,reward}] + [--tokenizer TOKENIZER] + [--tokenizer-mode ['auto', 'deepseek_v32', 'hf', 'mistral', 'slow']] + [--trust-remote-code | --no-trust-remote-code] + [--dtype {auto,bfloat16,float,float16,float32,half}] + [--seed SEED] [--hf-config-path HF_CONFIG_PATH] + [--allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH] + [--allowed-media-domains ALLOWED_MEDIA_DOMAINS [ALLOWED_MEDIA_DOMAINS ...]] + [--revision REVISION] [--code-revision CODE_REVISION] + [--tokenizer-revision TOKENIZER_REVISION] + [--max-model-len MAX_MODEL_LEN] + [--quantization QUANTIZATION] + [--enforce-eager | --no-enforce-eager] + [--max-logprobs MAX_LOGPROBS] + [--logprobs-mode {processed_logits,processed_logprobs,raw_logits,raw_logprobs}] + [--disable-sliding-window | --no-disable-sliding-window] + [--disable-cascade-attn | --no-disable-cascade-attn] + [--skip-tokenizer-init | --no-skip-tokenizer-init] + [--enable-prompt-embeds | --no-enable-prompt-embeds] + [--served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]] + [--config-format ['auto', 'hf', 'mistral']] + [--hf-token [HF_TOKEN]] [--hf-overrides HF_OVERRIDES] + [--pooler-config POOLER_CONFIG] + [--logits-processor-pattern LOGITS_PROCESSOR_PATTERN] + [--generation-config GENERATION_CONFIG] + [--override-generation-config OVERRIDE_GENERATION_CONFIG] + [--enable-sleep-mode | --no-enable-sleep-mode] + [--model-impl ['auto', 'terratorch', 'transformers', 'vllm']] + [--override-attention-dtype OVERRIDE_ATTENTION_DTYPE] + [--logits-processors LOGITS_PROCESSORS [LOGITS_PROCESSORS ...]] + [--io-processor-plugin IO_PROCESSOR_PLUGIN] + [--load-format LOAD_FORMAT] [--download-dir DOWNLOAD_DIR] + [--safetensors-load-strategy SAFETENSORS_LOAD_STRATEGY] + [--model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG] + [--ignore-patterns IGNORE_PATTERNS [IGNORE_PATTERNS ...]] + [--use-tqdm-on-load | --no-use-tqdm-on-load] + [--pt-load-map-location PT_LOAD_MAP_LOCATION] + [--attention-backend ATTENTION_BACKEND] + [--reasoning-parser REASONING_PARSER] + [--reasoning-parser-plugin REASONING_PARSER_PLUGIN] + [--distributed-executor-backend ['external_launcher', 'mp', 'ray', 'uni']] + [--pipeline-parallel-size PIPELINE_PARALLEL_SIZE] + [--master-addr MASTER_ADDR] [--master-port MASTER_PORT] + [--nnodes NNODES] [--node-rank NODE_RANK] + [--tensor-parallel-size TENSOR_PARALLEL_SIZE] + [--decode-context-parallel-size DECODE_CONTEXT_PARALLEL_SIZE] + [--dcp-kv-cache-interleave-size DCP_KV_CACHE_INTERLEAVE_SIZE] + [--cp-kv-cache-interleave-size CP_KV_CACHE_INTERLEAVE_SIZE] + [--prefill-context-parallel-size PREFILL_CONTEXT_PARALLEL_SIZE] + [--data-parallel-size DATA_PARALLEL_SIZE] + [--data-parallel-rank DATA_PARALLEL_RANK] + [--data-parallel-start-rank DATA_PARALLEL_START_RANK] + [--data-parallel-size-local DATA_PARALLEL_SIZE_LOCAL] + [--data-parallel-address DATA_PARALLEL_ADDRESS] + [--data-parallel-rpc-port DATA_PARALLEL_RPC_PORT] + [--data-parallel-backend DATA_PARALLEL_BACKEND] + [--data-parallel-hybrid-lb | --no-data-parallel-hybrid-lb | -dph] + [--data-parallel-external-lb | --no-data-parallel-external-lb | -dpe] + [--enable-expert-parallel | --no-enable-expert-parallel] + [--all2all-backend {allgather_reducescatter,deepep_high_throughput,deepep_low_latency,flashinfer_all2allv,naive,pplx,None}] + [--enable-dbo | --no-enable-dbo] + [--dbo-decode-token-threshold DBO_DECODE_TOKEN_THRESHOLD] + [--dbo-prefill-token-threshold DBO_PREFILL_TOKEN_THRESHOLD] + [--disable-nccl-for-dp-synchronization | --no-disable-nccl-for-dp-synchronization] + [--enable-eplb | --no-enable-eplb] + [--eplb-config EPLB_CONFIG] + [--expert-placement-strategy {linear,round_robin}] + [--max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS] + [--ray-workers-use-nsight | --no-ray-workers-use-nsight] + [--disable-custom-all-reduce | --no-disable-custom-all-reduce] + [--worker-cls WORKER_CLS] + [--worker-extension-cls WORKER_EXTENSION_CLS] + [--block-size {1,8,16,32,64,128,256}] + [--gpu-memory-utilization GPU_MEMORY_UTILIZATION] + [--kv-cache-memory-bytes KV_CACHE_MEMORY_BYTES] + [--swap-space SWAP_SPACE] + [--kv-cache-dtype {auto,bfloat16,fp8,fp8_ds_mla,fp8_e4m3,fp8_e5m2,fp8_inc}] + [--num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE] + [--enable-prefix-caching | --no-enable-prefix-caching] + [--prefix-caching-hash-algo {sha256,sha256_cbor,xxhash,xxhash_cbor}] + [--cpu-offload-gb CPU_OFFLOAD_GB] + [--calculate-kv-scales | --no-calculate-kv-scales] + [--kv-sharing-fast-prefill | --no-kv-sharing-fast-prefill] + [--mamba-cache-dtype {auto,float16,float32}] + [--mamba-ssm-cache-dtype {auto,float16,float32}] + [--mamba-block-size MAMBA_BLOCK_SIZE] + [--kv-offloading-size KV_OFFLOADING_SIZE] + [--kv-offloading-backend {lmcache,native,None}] + [--limit-mm-per-prompt LIMIT_MM_PER_PROMPT] + [--enable-mm-embeds | --no-enable-mm-embeds] + [--media-io-kwargs MEDIA_IO_KWARGS] + [--mm-processor-kwargs MM_PROCESSOR_KWARGS] + [--mm-processor-cache-gb MM_PROCESSOR_CACHE_GB] + [--mm-processor-cache-type {lru,shm}] + [--mm-shm-cache-max-object-size-mb MM_SHM_CACHE_MAX_OBJECT_SIZE_MB] + [--mm-encoder-tp-mode {data,weights}] + [--mm-encoder-attn-backend MM_ENCODER_ATTN_BACKEND] + [--interleave-mm-strings | --no-interleave-mm-strings] + [--skip-mm-profiling | --no-skip-mm-profiling] + [--video-pruning-rate VIDEO_PRUNING_RATE] + [--enable-lora | --no-enable-lora] + [--max-loras MAX_LORAS] + [--max-lora-rank {1,8,16,32,64,128,256,320,512}] + [--lora-dtype {auto,bfloat16,float16}] + [--max-cpu-loras MAX_CPU_LORAS] + [--fully-sharded-loras | --no-fully-sharded-loras] + [--default-mm-loras DEFAULT_MM_LORAS] + [--show-hidden-metrics-for-version SHOW_HIDDEN_METRICS_FOR_VERSION] + [--otlp-traces-endpoint OTLP_TRACES_ENDPOINT] + [--collect-detailed-traces {all,model,worker,None} [{all,model,worker,None} ...]] + [--kv-cache-metrics | --no-kv-cache-metrics] + [--kv-cache-metrics-sample KV_CACHE_METRICS_SAMPLE] + [--cudagraph-metrics | --no-cudagraph-metrics] + [--enable-layerwise-nvtx-tracing | --no-enable-layerwise-nvtx-tracing] + [--max-num-batched-tokens MAX_NUM_BATCHED_TOKENS] + [--max-num-seqs MAX_NUM_SEQS] + [--max-num-partial-prefills MAX_NUM_PARTIAL_PREFILLS] + [--max-long-partial-prefills MAX_LONG_PARTIAL_PREFILLS] + [--long-prefill-token-threshold LONG_PREFILL_TOKEN_THRESHOLD] + [--scheduling-policy {fcfs,priority}] + [--enable-chunked-prefill | --no-enable-chunked-prefill] + [--disable-chunked-mm-input | --no-disable-chunked-mm-input] + [--scheduler-cls SCHEDULER_CLS] + [--disable-hybrid-kv-cache-manager | --no-disable-hybrid-kv-cache-manager] + [--async-scheduling | --no-async-scheduling] + [--stream-interval STREAM_INTERVAL] + [--cudagraph-capture-sizes CUDAGRAPH_CAPTURE_SIZES [CUDAGRAPH_CAPTURE_SIZES ...]] + [--max-cudagraph-capture-size MAX_CUDAGRAPH_CAPTURE_SIZE] + [--speculative-config SPECULATIVE_CONFIG] + [--kv-transfer-config KV_TRANSFER_CONFIG] + [--kv-events-config KV_EVENTS_CONFIG] + [--ec-transfer-config EC_TRANSFER_CONFIG] + [--compilation-config COMPILATION_CONFIG] + [--attention-config ATTENTION_CONFIG] + [--additional-config ADDITIONAL_CONFIG] + [--structured-outputs-config STRUCTURED_OUTPUTS_CONFIG] + [--profiler-config PROFILER_CONFIG] + [--optimization-level OPTIMIZATION_LEVEL] + [--disable-log-stats] [--aggregate-engine-logging] + [--enable-log-requests | --no-enable-log-requests] + [--disable-log-requests | --no-disable-log-requests] + [model_tag] +api_server.py: error: unrecognized arguments: --guided-decoding-backend +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +[0;36m(EngineCore_DP0 pid=3603174)[0;0m
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s] +[0;36m(EngineCore_DP0 pid=3603174)[0;0m
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:06<00:19, 6.52s/it] +[0;36m(EngineCore_DP0 pid=3603174)[0;0m
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:07, 3.70s/it] +[0;36m(EngineCore_DP0 pid=3603174)[0;0m
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:15<00:05, 5.28s/it] +[0;36m(EngineCore_DP0 pid=3603174)[0;0m
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:21<00:00, 5.57s/it] +[0;36m(EngineCore_DP0 pid=3603174)[0;0m
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:21<00:00, 5.35s/it] +[0;36m(EngineCore_DP0 pid=3603174)[0;0m +[0;36m(EngineCore_DP0 pid=3603174)[0;0m
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 0%| | 0/51 [00:00<?, ?it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 4%|▍ | 2/51 [00:00<00:03, 14.83it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 8%|▊ | 4/51 [00:00<00:03, 15.62it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 12%|█▏ | 6/51 [00:00<00:02, 16.45it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 16%|█▌ | 8/51 [00:00<00:02, 16.87it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 22%|██▏ | 11/51 [00:00<00:02, 18.23it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 27%|██▋ | 14/51 [00:00<00:01, 19.00it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 33%|███▎ | 17/51 [00:00<00:01, 19.74it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 39%|███▉ | 20/51 [00:01<00:01, 20.74it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 45%|████▌ | 23/51 [00:01<00:01, 21.38it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 51%|█████ | 26/51 [00:01<00:01, 21.90it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 57%|█████▋ | 29/51 [00:01<00:01, 20.09it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 63%|██████▎ | 32/51 [00:01<00:00, 21.04it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 69%|██████▊ | 35/51 [00:01<00:00, 21.92it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 75%|███████▍ | 38/51 [00:01<00:00, 22.58it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 80%|████████ | 41/51 [00:01<00:00, 23.20it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 86%|████████▋ | 44/51 [00:02<00:00, 23.84it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 92%|█████████▏| 47/51 [00:02<00:00, 23.47it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 98%|█████████▊| 50/51 [00:02<00:00, 24.19it/s]
Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 51/51 [00:02<00:00, 20.91it/s] +[0;36m(EngineCore_DP0 pid=3603174)[0;0m
Capturing CUDA graphs (decode, FULL): 0%| | 0/35 [00:00<?, ?it/s]
Capturing CUDA graphs (decode, FULL): 3%|▎ | 1/35 [00:00<00:04, 7.51it/s]
Capturing CUDA graphs (decode, FULL): 11%|█▏ | 4/35 [00:00<00:01, 16.43it/s]
Capturing CUDA graphs (decode, FULL): 20%|██ | 7/35 [00:00<00:01, 19.40it/s]
Capturing CUDA graphs (decode, FULL): 29%|██▊ | 10/35 [00:00<00:01, 20.92it/s]
Capturing CUDA graphs (decode, FULL): 37%|███▋ | 13/35 [00:00<00:01, 21.93it/s]
Capturing CUDA graphs (decode, FULL): 46%|████▌ | 16/35 [00:00<00:00, 22.62it/s]
Capturing CUDA graphs (decode, FULL): 54%|█████▍ | 19/35 [00:00<00:00, 23.03it/s]
Capturing CUDA graphs (decode, FULL): 63%|██████▎ | 22/35 [00:01<00:00, 23.33it/s]
Capturing CUDA graphs (decode, FULL): 71%|███████▏ | 25/35 [00:01<00:00, 23.31it/s]
Capturing CUDA graphs (decode, FULL): 80%|████████ | 28/35 [00:01<00:00, 23.67it/s]
Capturing CUDA graphs (decode, FULL): 89%|████████▊ | 31/35 [00:01<00:00, 24.00it/s]
Capturing CUDA graphs (decode, FULL): 97%|█████████▋| 34/35 [00:01<00:00, 24.21it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 22.44it/s] +[0;36m(APIServer pid=3602630)[0;0m INFO: Started server process [3602630] +[0;36m(APIServer pid=3602630)[0;0m INFO: Waiting for application startup. +[0;36m(APIServer pid=3602630)[0;0m INFO: Application startup complete. +/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( +2026-01-26 12:38:05,935 - INFO - Loaded dataset: math-hard +2026-01-26 12:38:05,935 - INFO - Profile path configured: ../data/complex_profiles_v2/profiles_200.jsonl +2026-01-26 12:38:06,001 - INFO - Loaded 200 profiles from ../data/complex_profiles_v2/profiles_200.jsonl +2026-01-26 12:38:06,003 - INFO - Running method: vanilla +2026-01-26 12:38:06,004 - INFO - Running profiles 0 to 9 (10 remaining) +2026-01-26 12:38:06,004 - INFO - Using BATCH processing (local vLLM user) for vanilla +2026-01-26 12:38:06,006 - INFO - Using local vLLM user simulator: http://localhost:8004/v1 +2026-01-26 12:38:11,230 - INFO - Batch: 10 profiles, 20 sessions remaining +2026-01-26 12:38:11,240 - INFO - Session round 1/2: 10 total, 6876 sessions/hr +2026-01-26 12:38:11,248 - INFO - Session round 2/2: 20 total, 13730 sessions/hr +2026-01-26 12:38:11,996 - INFO - GPU memory freed after vanilla: 0.0GB allocated +2026-01-26 12:38:12,000 - INFO - Report saved to ../results/test_local_user_20260126_123755/20260126_123805/report.md diff --git a/collaborativeagents/slurm/fullscale/test_run.sh b/collaborativeagents/slurm/fullscale/test_run.sh new file mode 100644 index 0000000..de6a0e1 --- /dev/null +++ b/collaborativeagents/slurm/fullscale/test_run.sh @@ -0,0 +1,70 @@ +#!/bin/bash +#SBATCH --job-name=test_fullscale +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=16 +#SBATCH --gres=gpu:nvidia_a100:2 +#SBATCH --mem=128G +#SBATCH --time=1:00:00 +#SBATCH --output=test_fullscale_%j.out +#SBATCH --error=test_fullscale_%j.err + +# Test run: 2 profiles × 2 methods × 2 sessions = 8 sessions +# Should complete in ~10-15 minutes + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" +export NCCL_P2P_DISABLE=1 + +# Load OpenAI API key +set -a +source .env +set +a + +pip install --quiet openai python-dotenv json-repair + +MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" + +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +# GPU 0: vLLM 8B agent, GPU 1: adapter models +CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ + --model $MODEL_8B --port 8003 --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.5 --max-model-len 8192 \ + --dtype bfloat16 --disable-log-requests & + +# Wait for server +for i in $(seq 1 90); do + curl -s http://localhost:8003/health > /dev/null 2>&1 && break + sleep 2 +done +echo "vLLM 8B agent server ready." + +cd collaborativeagents/scripts + +# Test run: vanilla + rag_vector (light + heavy methods) +python run_experiments.py \ + --methods vanilla,rag_vector \ + --datasets math-hard \ + --n-profiles 2 \ + --n-sessions 2 \ + --max-turns 8 \ + --use-vllm \ + --use-openai-user \ + --openai-user-model gpt-5-mini \ + --reward-mode llm \ + --vllm-agent-url http://localhost:8003/v1 \ + --parallel-profiles 2 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/fullscale_test_$(date +%Y%m%d_%H%M%S) + +echo "Test run complete!" + +pkill -f "vllm.entrypoints" 2>/dev/null || true |
