#!/bin/bash #SBATCH --job-name=full_exp_v2 #SBATCH --account=bfqt-delta-gpu #SBATCH --partition=gpuA100x4 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=16 #SBATCH --gres=gpu:nvidia_a100:4 #SBATCH --mem=200G #SBATCH --time=48:00:00 #SBATCH --output=logs/full_exp_v2_%j.out #SBATCH --error=logs/full_exp_v2_%j.err # Full scale experiment v2 - with fixes: # 1. Use stable datasets (math-500, humaneval) - avoid problematic ones # 2. Reduced scale first (20 profiles, 15 sessions) to verify stability # 3. Clear CUDA cache between methods set -e cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model mkdir -p collaborativeagents/slurm/logs mkdir -p collaborativeagents/results echo "Starting FULL SCALE v2 experiment at $(date)" echo "Job ID: $SLURM_JOB_ID" echo "Node: $SLURMD_NODENAME" echo "GPUs: $CUDA_VISIBLE_DEVICES" source /u/yurenh2/miniforge3/etc/profile.d/conda.sh conda activate eval nvidia-smi export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface mkdir -p $HF_HOME export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" # Disable peer-to-peer memory access to avoid nvlink errors export CUDA_VISIBLE_DEVICES=0,1,2,3 export NCCL_P2P_DISABLE=1 cd collaborativeagents/scripts # FULL SCALE v2: # - 20 profiles (reduced from 30 for stability) # - 15 sessions per profile (enough to show learning) # - 2 stable datasets: math-500, humaneval # - All 7 methods echo "Running FULL SCALE v2 experiments..." python run_experiments.py \ --methods vanilla,all_memory,rag,rag_vector,contextual,reflection,reflection_grpo \ --datasets math-500,humaneval \ --n-profiles 20 \ --n-sessions 15 \ --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ --output-dir ../results/full_experiment_v2_$(date +%Y%m%d_%H%M%S) echo "Full experiment v2 completed at $(date)"