#!/bin/bash #SBATCH --job-name=reflection_grpo #SBATCH --account=bfqt-delta-gpu #SBATCH --partition=gpuA100x4 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=16 #SBATCH --gres=gpu:nvidia_a100:4 #SBATCH --mem=200G #SBATCH --time=48:00:00 #SBATCH --output=logs/reflection_grpo_%j.out #SBATCH --error=logs/reflection_grpo_%j.err set -e cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model mkdir -p collaborativeagents/slurm/logs collaborativeagents/results source /u/yurenh2/miniforge3/etc/profile.d/conda.sh conda activate eval export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface export PYTHONPATH="${PWD}/src:${PWD}/collaborativeagents:${PYTHONPATH}" export NCCL_P2P_DISABLE=1 cd collaborativeagents/scripts python run_experiments.py \ --methods reflection_grpo \ --datasets mmlu,aime,math-hard,humaneval \ --n-profiles 30 \ --n-sessions 20 \ --profile-path ../data/complex_profiles_v2/profiles_100.jsonl \ --output-dir ../results/reflection_grpo_$(date +%Y%m%d_%H%M%S)