#!/bin/bash # run_training.sh # Script to run RLVR training experiments with different precision modes set -e set -o pipefail # Properly capture exit codes through pipes # Configuration export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-"0,1"} # HuggingFace cache - use shared HDD storage to avoid quota issues export HF_HOME="/work/hdd/bfqt/yurenh2/huggingface_cache" export HF_HUB_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/hub" mkdir -p "$HF_HOME" "$HF_HUB_CACHE" # Default values PRECISION_MODE=${1:-"bf16"} SEED=${2:-1} TRAIN_DATA=${TRAIN_DATA:-"./data/dm_train.json"} OUTPUT_BASE=${OUTPUT_BASE:-"./results/train_logs"} MODEL_NAME=${MODEL_NAME:-"Qwen/Qwen2.5-Math-7B"} NUM_STEPS=${NUM_STEPS:-300} # Create output directory OUTPUT_DIR="${OUTPUT_BASE}/${PRECISION_MODE}_seed${SEED}" mkdir -p "$OUTPUT_DIR" echo "==============================================" echo "RLVR Training" echo "==============================================" echo "Precision Mode: $PRECISION_MODE" echo "Seed: $SEED" echo "Model: $MODEL_NAME" echo "Training Data: $TRAIN_DATA" echo "Output: $OUTPUT_DIR" echo "Num Steps: $NUM_STEPS" echo "==============================================" # Run training python train_rlvr.py \ --precision_mode "$PRECISION_MODE" \ --seed "$SEED" \ --output_dir "$OUTPUT_DIR" \ --train_dataset_path "$TRAIN_DATA" \ --model_name "$MODEL_NAME" \ --num_steps "$NUM_STEPS" \ 2>&1 | tee "${OUTPUT_DIR}/training.log" echo "Training complete. Output saved to: $OUTPUT_DIR"