summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-02-04 18:59:35 -0600
committerYurenHao0426 <blackhao0426@gmail.com>2026-02-04 18:59:35 -0600
commitf1c2cc22d46a6976df3555391e667c7e61592fad (patch)
tree0b37b52c8ff91042a742d3b3ec54542cb6d6e2f6 /scripts
Initial commit: RL floating-point noise projectHEADmain
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/prepare_data.py258
-rwxr-xr-xscripts/run_evaluation.sh58
-rwxr-xr-xscripts/run_full_experiment.sh106
-rwxr-xr-xscripts/run_training.sh50
-rwxr-xr-xscripts/setup_env.sh79
-rwxr-xr-xscripts/slurm_train.sh145
-rwxr-xr-xscripts/submit_all_jobs.sh66
-rwxr-xr-xscripts/submit_single_job.sh32
-rw-r--r--scripts/test_quick.sh78
9 files changed, 872 insertions, 0 deletions
diff --git a/scripts/prepare_data.py b/scripts/prepare_data.py
new file mode 100755
index 0000000..5ef3c29
--- /dev/null
+++ b/scripts/prepare_data.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+Prepare REAL datasets for RLVR floating-point precision experiments.
+
+Downloads from HuggingFace:
+- Training: GSM8K train (7473 samples)
+- Evaluation: GSM8K test, MATH-500, AIME, AMC, MMLU-STEM, HumanEval
+
+Usage:
+ python scripts/prepare_data.py
+"""
+
+import json
+import os
+import random
+from pathlib import Path
+from datasets import load_dataset
+from tqdm import tqdm
+
+DATA_DIR = Path("data")
+DATA_DIR.mkdir(exist_ok=True)
+
+
+def save_json(data: list, path: Path):
+ """Save data as JSON file."""
+ with open(path, "w") as f:
+ json.dump(data, f, indent=2)
+ print(f" Saved {len(data)} samples to {path}")
+
+
+def prepare_gsm8k_train():
+ """Prepare GSM8K training data."""
+ print("\n=== Downloading GSM8K Train ===")
+ ds = load_dataset("openai/gsm8k", "main", split="train")
+
+ data = []
+ for i, sample in enumerate(tqdm(ds, desc="Processing")):
+ # Extract answer from "#### N" format
+ answer = sample["answer"].split("####")[-1].strip()
+ data.append({
+ "id": f"gsm8k_train_{i}",
+ "prompt": sample["question"],
+ "answer": answer,
+ "solution": sample["answer"],
+ "source": "gsm8k_train"
+ })
+
+ save_json(data, DATA_DIR / "dm_train.json")
+ return data
+
+
+def prepare_gsm8k_test():
+ """Prepare GSM8K test data for evaluation."""
+ print("\n=== Downloading GSM8K Test ===")
+ ds = load_dataset("openai/gsm8k", "main", split="test")
+
+ data = []
+ for i, sample in enumerate(tqdm(ds, desc="Processing")):
+ answer = sample["answer"].split("####")[-1].strip()
+ data.append({
+ "id": f"gsm8k_test_{i}",
+ "prompt": sample["question"],
+ "answer": answer,
+ "solution": sample["answer"],
+ "source": "gsm8k"
+ })
+
+ save_json(data, DATA_DIR / "gsm8k.json")
+
+ # Also create dm_val as a subset (first 500 for on-task eval)
+ save_json(data[:500], DATA_DIR / "dm_val.json")
+ return data
+
+
+def prepare_math500():
+ """Prepare MATH-500 dataset."""
+ print("\n=== Downloading MATH-500 ===")
+ ds = load_dataset("HuggingFaceH4/MATH-500", split="test")
+
+ data = []
+ for i, sample in enumerate(tqdm(ds, desc="Processing")):
+ data.append({
+ "id": f"math500_{i}",
+ "prompt": sample["problem"],
+ "answer": sample["answer"],
+ "solution": sample["solution"],
+ "subject": sample.get("subject", ""),
+ "level": sample.get("level", ""),
+ "source": "math500"
+ })
+
+ save_json(data, DATA_DIR / "math500.json")
+ return data
+
+
+def prepare_aime():
+ """Prepare AIME dataset from AI-MO."""
+ print("\n=== Downloading AIME ===")
+ ds = load_dataset("AI-MO/aimo-validation-aime", split="train")
+
+ data = []
+ for i, sample in enumerate(tqdm(ds, desc="Processing")):
+ data.append({
+ "id": f"aime_{i}",
+ "prompt": sample["problem"],
+ "answer": str(sample["answer"]),
+ "solution": sample.get("solution", ""),
+ "url": sample.get("url", ""),
+ "source": "aime"
+ })
+
+ # Split into aime24 and aime25
+ # Real AIME has 15 problems per contest, 2 contests per year = 30/year
+ save_json(data[:30], DATA_DIR / "aime24.json")
+ save_json(data[30:60], DATA_DIR / "aime25.json")
+ save_json(data, DATA_DIR / "aime_all.json")
+ return data
+
+
+def prepare_amc():
+ """Prepare AMC dataset from AI-MO."""
+ print("\n=== Downloading AMC ===")
+ ds = load_dataset("AI-MO/aimo-validation-amc", split="train")
+
+ data = []
+ for i, sample in enumerate(tqdm(ds, desc="Processing")):
+ data.append({
+ "id": f"amc_{i}",
+ "prompt": sample["problem"],
+ "answer": str(sample["answer"]),
+ "solution": sample.get("solution", ""),
+ "source": "amc"
+ })
+
+ save_json(data, DATA_DIR / "amc23.json")
+ return data
+
+
+def prepare_mmlu_stem():
+ """Prepare MMLU-STEM subset."""
+ print("\n=== Downloading MMLU-STEM ===")
+
+ stem_subjects = [
+ "abstract_algebra", "astronomy", "college_biology", "college_chemistry",
+ "college_computer_science", "college_mathematics", "college_physics",
+ "computer_security", "conceptual_physics", "electrical_engineering",
+ "elementary_mathematics", "high_school_biology", "high_school_chemistry",
+ "high_school_computer_science", "high_school_mathematics", "high_school_physics",
+ "high_school_statistics", "machine_learning"
+ ]
+
+ data = []
+ for subject in tqdm(stem_subjects, desc="Loading subjects"):
+ try:
+ ds = load_dataset("cais/mmlu", subject, split="test")
+ for i, sample in enumerate(ds):
+ choices = sample["choices"]
+ correct_idx = sample["answer"]
+ # Format as multiple choice
+ prompt = f"{sample['question']}\n"
+ for j, choice in enumerate(choices):
+ prompt += f"({chr(65+j)}) {choice}\n"
+
+ data.append({
+ "id": f"mmlu_{subject}_{i}",
+ "prompt": prompt,
+ "answer": chr(65 + correct_idx),
+ "subject": subject,
+ "source": "mmlu_stem"
+ })
+ except Exception as e:
+ print(f" Warning: Skipping {subject}: {e}")
+
+ # Take a random subset of 500
+ random.seed(42)
+ if len(data) > 500:
+ data = random.sample(data, 500)
+
+ save_json(data, DATA_DIR / "mmlu_stem.json")
+ return data
+
+
+def prepare_humaneval():
+ """Prepare HumanEval code dataset."""
+ print("\n=== Downloading HumanEval ===")
+ ds = load_dataset("openai/openai_humaneval", split="test")
+
+ data = []
+ for i, sample in enumerate(tqdm(ds, desc="Processing")):
+ data.append({
+ "id": f"humaneval_{i}",
+ "prompt": sample["prompt"],
+ "answer": sample["canonical_solution"],
+ "entry_point": sample["entry_point"],
+ "test": sample["test"],
+ "source": "humaneval"
+ })
+
+ save_json(data, DATA_DIR / "humaneval.json")
+ return data
+
+
+def verify_data():
+ """Verify downloaded data quality."""
+ print("\n" + "=" * 60)
+ print("Verifying Data Quality")
+ print("=" * 60)
+
+ for f in sorted(DATA_DIR.glob("*.json")):
+ with open(f) as fp:
+ data = json.load(fp)
+
+ # Check for unique prompts
+ prompts = [d["prompt"] for d in data]
+ unique = len(set(prompts))
+
+ status = "OK" if unique == len(prompts) else f"WARN: {len(prompts)-unique} duplicates"
+ print(f" {f.name}: {len(data)} samples, {unique} unique [{status}]")
+
+ # Show first example
+ if data:
+ print(f" Example: {data[0]['prompt'][:60]}...")
+
+
+def main():
+ print("=" * 60)
+ print("RLVR Real Data Preparation")
+ print("=" * 60)
+
+ # Backup old data
+ backup_dir = DATA_DIR / "backup_synthetic"
+ if not backup_dir.exists() and any(DATA_DIR.glob("*.json")):
+ backup_dir.mkdir(exist_ok=True)
+ for f in DATA_DIR.glob("*.json"):
+ f.rename(backup_dir / f.name)
+ print(f"Backed up synthetic data to {backup_dir}")
+
+ # Training data
+ prepare_gsm8k_train()
+
+ # Evaluation data
+ prepare_gsm8k_test()
+ prepare_math500()
+ prepare_aime()
+ prepare_amc()
+ prepare_mmlu_stem()
+ prepare_humaneval()
+
+ # Verify
+ verify_data()
+
+ print("\n" + "=" * 60)
+ print("Data preparation complete!")
+ print("=" * 60)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/run_evaluation.sh b/scripts/run_evaluation.sh
new file mode 100755
index 0000000..b39c230
--- /dev/null
+++ b/scripts/run_evaluation.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# run_evaluation.sh
+# Script to run evaluation on trained models
+
+set -e
+set -o pipefail # Properly capture exit codes through pipes
+
+# Configuration
+export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-"0"}
+
+# HuggingFace cache - use shared HDD storage to avoid quota issues
+export HF_HOME="/work/hdd/bfqt/yurenh2/huggingface_cache"
+export HF_HUB_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/hub"
+mkdir -p "$HF_HOME" "$HF_HUB_CACHE"
+
+# Default values
+PRECISION_MODE=${1:-"bf16"}
+SEED=${2:-1}
+BASE_MODEL=${BASE_MODEL:-"Qwen/Qwen2.5-Math-7B"}
+TRAIN_LOGS_DIR=${TRAIN_LOGS_DIR:-"./results/train_logs"}
+EVAL_METRICS_DIR=${EVAL_METRICS_DIR:-"./results/eval_metrics"}
+EVAL_CONFIG=${EVAL_CONFIG:-"./configs/eval_tasks_config.json"}
+
+# Paths
+FT_CKPT="${TRAIN_LOGS_DIR}/${PRECISION_MODE}_seed${SEED}/final_model"
+OUTPUT_PATH="${EVAL_METRICS_DIR}/${PRECISION_MODE}_seed${SEED}.json"
+
+# Create output directory
+mkdir -p "$EVAL_METRICS_DIR"
+
+echo "=============================================="
+echo "Model Evaluation"
+echo "=============================================="
+echo "Precision Mode: $PRECISION_MODE"
+echo "Seed: $SEED"
+echo "Base Model: $BASE_MODEL"
+echo "Finetuned Model: $FT_CKPT"
+echo "Output: $OUTPUT_PATH"
+echo "=============================================="
+
+# Check if checkpoint exists
+if [ ! -d "$FT_CKPT" ]; then
+ echo "Error: Checkpoint not found at $FT_CKPT"
+ exit 1
+fi
+
+# Run evaluation
+python eval_policy.py \
+ --base_ckpt "$BASE_MODEL" \
+ --ft_ckpt "$FT_CKPT" \
+ --eval_tasks_config "$EVAL_CONFIG" \
+ --output_path "$OUTPUT_PATH" \
+ --eval_base \
+ --use_amp \
+ 2>&1 | tee "${EVAL_METRICS_DIR}/${PRECISION_MODE}_seed${SEED}_eval.log"
+
+echo "Evaluation complete. Results saved to: $OUTPUT_PATH"
+
diff --git a/scripts/run_full_experiment.sh b/scripts/run_full_experiment.sh
new file mode 100755
index 0000000..43e9dd5
--- /dev/null
+++ b/scripts/run_full_experiment.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+# run_full_experiment.sh
+# Master script to run the complete RLVR floating-point precision experiment
+
+set -e
+
+# Configuration
+SEEDS=(1 2 3 4 5)
+PRECISION_MODES=("fp32" "bf16")
+TRAIN_DATA=${TRAIN_DATA:-"./data/dm_train.json"}
+OUTPUT_BASE=${OUTPUT_BASE:-"./results"}
+
+echo "=============================================="
+echo "RLVR Floating-Point Precision Experiment"
+echo "=============================================="
+echo "Seeds: ${SEEDS[*]}"
+echo "Precision Modes: ${PRECISION_MODES[*]}"
+echo "Output: $OUTPUT_BASE"
+echo "=============================================="
+
+# Create directories
+mkdir -p "$OUTPUT_BASE/train_logs"
+mkdir -p "$OUTPUT_BASE/eval_metrics"
+mkdir -p "$OUTPUT_BASE/analysis"
+
+# Phase 1: Training
+echo ""
+echo "=============================================="
+echo "PHASE 1: TRAINING"
+echo "=============================================="
+
+for precision in "${PRECISION_MODES[@]}"; do
+ for seed in "${SEEDS[@]}"; do
+ echo "Training: precision=$precision, seed=$seed"
+
+ OUTPUT_DIR="$OUTPUT_BASE/train_logs/${precision}_seed${seed}"
+
+ # Skip if already completed
+ if [ -d "$OUTPUT_DIR/final_model" ]; then
+ echo " -> Skipping (already completed)"
+ continue
+ fi
+
+ # Run training
+ bash scripts/run_training.sh "$precision" "$seed"
+ done
+done
+
+# Phase 2: Evaluation
+echo ""
+echo "=============================================="
+echo "PHASE 2: EVALUATION"
+echo "=============================================="
+
+for precision in "${PRECISION_MODES[@]}"; do
+ for seed in "${SEEDS[@]}"; do
+ echo "Evaluating: precision=$precision, seed=$seed"
+
+ OUTPUT_PATH="$OUTPUT_BASE/eval_metrics/${precision}_seed${seed}.json"
+
+ # Skip if already completed
+ if [ -f "$OUTPUT_PATH" ]; then
+ echo " -> Skipping (already completed)"
+ continue
+ fi
+
+ # Run evaluation
+ bash scripts/run_evaluation.sh "$precision" "$seed"
+ done
+done
+
+# Phase 3: bf16 Sparsity Analysis
+echo ""
+echo "=============================================="
+echo "PHASE 3: BF16 SPARSITY ANALYSIS"
+echo "=============================================="
+
+python run_experiments.py --mode sparsity \
+ --base_output_dir "$OUTPUT_BASE" \
+ --seeds "${SEEDS[@]}"
+
+# Phase 4: Results Analysis
+echo ""
+echo "=============================================="
+echo "PHASE 4: RESULTS ANALYSIS"
+echo "=============================================="
+
+python analyze_results.py \
+ --results_dir "$OUTPUT_BASE/eval_metrics" \
+ --output_dir "$OUTPUT_BASE/analysis" \
+ --on_task dm_val \
+ --off_task aime24 aime25 amc23 math500 mmlu_stem humaneval
+
+echo ""
+echo "=============================================="
+echo "EXPERIMENT COMPLETE"
+echo "=============================================="
+echo "Results saved to: $OUTPUT_BASE"
+echo ""
+echo "Key output files:"
+echo " - Training logs: $OUTPUT_BASE/train_logs/"
+echo " - Evaluation metrics: $OUTPUT_BASE/eval_metrics/"
+echo " - Analysis: $OUTPUT_BASE/analysis/full_analysis.json"
+echo " - Plots: $OUTPUT_BASE/analysis/*.png"
+echo "=============================================="
+
diff --git a/scripts/run_training.sh b/scripts/run_training.sh
new file mode 100755
index 0000000..38b2fc8
--- /dev/null
+++ b/scripts/run_training.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# run_training.sh
+# Script to run RLVR training experiments with different precision modes
+
+set -e
+set -o pipefail # Properly capture exit codes through pipes
+
+# Configuration
+export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-"0,1"}
+
+# HuggingFace cache - use shared HDD storage to avoid quota issues
+export HF_HOME="/work/hdd/bfqt/yurenh2/huggingface_cache"
+export HF_HUB_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/hub"
+mkdir -p "$HF_HOME" "$HF_HUB_CACHE"
+
+# Default values
+PRECISION_MODE=${1:-"bf16"}
+SEED=${2:-1}
+TRAIN_DATA=${TRAIN_DATA:-"./data/dm_train.json"}
+OUTPUT_BASE=${OUTPUT_BASE:-"./results/train_logs"}
+MODEL_NAME=${MODEL_NAME:-"Qwen/Qwen2.5-Math-7B"}
+NUM_STEPS=${NUM_STEPS:-300}
+
+# Create output directory
+OUTPUT_DIR="${OUTPUT_BASE}/${PRECISION_MODE}_seed${SEED}"
+mkdir -p "$OUTPUT_DIR"
+
+echo "=============================================="
+echo "RLVR Training"
+echo "=============================================="
+echo "Precision Mode: $PRECISION_MODE"
+echo "Seed: $SEED"
+echo "Model: $MODEL_NAME"
+echo "Training Data: $TRAIN_DATA"
+echo "Output: $OUTPUT_DIR"
+echo "Num Steps: $NUM_STEPS"
+echo "=============================================="
+
+# Run training
+python train_rlvr.py \
+ --precision_mode "$PRECISION_MODE" \
+ --seed "$SEED" \
+ --output_dir "$OUTPUT_DIR" \
+ --train_dataset_path "$TRAIN_DATA" \
+ --model_name "$MODEL_NAME" \
+ --num_steps "$NUM_STEPS" \
+ 2>&1 | tee "${OUTPUT_DIR}/training.log"
+
+echo "Training complete. Output saved to: $OUTPUT_DIR"
+
diff --git a/scripts/setup_env.sh b/scripts/setup_env.sh
new file mode 100755
index 0000000..743f99a
--- /dev/null
+++ b/scripts/setup_env.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# setup_env.sh
+# One-time setup script for the RLVR floating-point precision experiment
+# Run this BEFORE submitting any jobs
+
+set -e
+
+CONDA_ENV="rlvr-fp"
+PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
+
+echo "============================================"
+echo "RLVR Environment Setup"
+echo "============================================"
+
+# Setup HuggingFace cache directories
+echo ""
+echo "Setting up HuggingFace cache..."
+HF_CACHE_DIR="/work/hdd/bfqt/yurenh2/huggingface_cache"
+mkdir -p "$HF_CACHE_DIR/hub" "$HF_CACHE_DIR/transformers"
+echo " Cache directory: $HF_CACHE_DIR"
+
+# Add to shell profile if not already present
+PROFILE_FILE="$HOME/.bashrc"
+if ! grep -q "HF_HOME.*huggingface_cache" "$PROFILE_FILE" 2>/dev/null; then
+ echo ""
+ echo "Adding HuggingFace cache settings to $PROFILE_FILE..."
+ cat >> "$PROFILE_FILE" << 'EOF'
+
+# HuggingFace cache - shared across all projects (added by RLVR setup)
+export HF_HOME="/work/hdd/bfqt/yurenh2/huggingface_cache"
+export HF_HUB_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/hub"
+export TRANSFORMERS_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/transformers"
+EOF
+ echo " Added to $PROFILE_FILE"
+else
+ echo " HuggingFace settings already in $PROFILE_FILE"
+fi
+
+# Source to apply changes
+source "$PROFILE_FILE"
+
+# Check if conda environment exists
+echo ""
+echo "Checking conda environment..."
+source ~/.bashrc
+
+if conda env list | grep -q "^${CONDA_ENV} "; then
+ echo " Environment '$CONDA_ENV' already exists"
+ echo " To recreate, run: conda env remove -n $CONDA_ENV && $0"
+else
+ echo " Creating conda environment: $CONDA_ENV"
+ conda create -n "$CONDA_ENV" python=3.10 -y
+
+ echo ""
+ echo "Installing dependencies..."
+ conda activate "$CONDA_ENV"
+ cd "$PROJECT_DIR"
+ pip install -r requirements.txt
+
+ echo ""
+ echo "Verifying installation..."
+ python -c "import torch; print(f'PyTorch: {torch.__version__}')"
+ python -c "import transformers; print(f'Transformers: {transformers.__version__}')"
+fi
+
+echo ""
+echo "============================================"
+echo "Setup complete!"
+echo "============================================"
+echo ""
+echo "To activate the environment:"
+echo " conda activate $CONDA_ENV"
+echo ""
+echo "To run experiments:"
+echo " ./scripts/submit_all_jobs.sh"
+echo ""
+echo "HuggingFace cache location: $HF_CACHE_DIR"
+echo " (1TB quota, shared across all projects)"
+echo "============================================"
diff --git a/scripts/slurm_train.sh b/scripts/slurm_train.sh
new file mode 100755
index 0000000..36bd5b1
--- /dev/null
+++ b/scripts/slurm_train.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+#SBATCH --job-name=rlvr_fp_exp
+#SBATCH --account=bfqt-delta-gpu
+#SBATCH --partition=gpuH200x8
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=16
+#SBATCH --gres=gpu:h200:4
+#SBATCH --mem=200G
+#SBATCH --time=2-00:00:00
+#SBATCH --output=results/slurm_logs/%x_%j.out
+#SBATCH --error=results/slurm_logs/%x_%j.err
+#SBATCH --mail-type=BEGIN,END,FAIL
+#SBATCH --mail-user=$USER@example.com
+
+# Exit on error and propagate exit codes through pipes
+set -o pipefail
+
+# ============================================
+# RLVR Floating-Point Precision Experiment
+# H200x8 SLURM Job Script
+# ============================================
+
+# Configuration - modify these as needed
+PRECISION_MODE=${PRECISION_MODE:-"bf16"}
+SEED=${SEED:-1}
+NUM_STEPS=${NUM_STEPS:-150} # ~45 hours on H200 with sequential generation
+
+# Paths
+PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
+CONDA_ENV="rlvr-fp" # Change to your conda env name
+MODEL_NAME="Qwen/Qwen2.5-Math-7B"
+TRAIN_DATA="${PROJECT_DIR}/data/dm_train.json"
+
+# ============================================
+# HuggingFace cache configuration
+# Use shared HDD storage to avoid home directory quota issues
+# This cache is shared across all projects
+# ============================================
+export HF_HOME="/work/hdd/bfqt/yurenh2/huggingface_cache"
+export HF_HUB_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/hub"
+mkdir -p "$HF_HOME" "$HF_HUB_CACHE"
+
+# Print job info
+echo "============================================"
+echo "SLURM Job ID: $SLURM_JOB_ID"
+echo "Running on: $(hostname)"
+echo "Start time: $(date)"
+echo "============================================"
+echo "Precision Mode: $PRECISION_MODE"
+echo "Seed: $SEED"
+echo "Num Steps: $NUM_STEPS"
+echo "GPUs: $CUDA_VISIBLE_DEVICES"
+echo "HF Cache: $HF_HOME"
+echo "============================================"
+
+# Setup environment
+cd "$PROJECT_DIR"
+mkdir -p results/slurm_logs
+
+# Activate conda environment
+source ~/.bashrc
+
+# Check if conda environment exists
+if conda env list | grep -q "^${CONDA_ENV} "; then
+ echo "Activating existing conda environment: $CONDA_ENV"
+ conda activate "$CONDA_ENV"
+else
+ echo "ERROR: Conda environment '$CONDA_ENV' does not exist!"
+ echo "Please create it first by running:"
+ echo " conda create -n $CONDA_ENV python=3.10 -y"
+ echo " conda activate $CONDA_ENV"
+ echo " pip install -r requirements.txt"
+ exit 1
+fi
+
+# Verify activation succeeded
+if [[ "$CONDA_DEFAULT_ENV" != "$CONDA_ENV" ]]; then
+ echo "ERROR: Failed to activate conda environment '$CONDA_ENV'"
+ exit 1
+fi
+echo "Conda environment activated: $CONDA_DEFAULT_ENV"
+
+# Check GPU availability
+nvidia-smi
+echo "CUDA devices: $(python -c 'import torch; print(torch.cuda.device_count())')"
+
+# Output directory (use /work for large checkpoints - /projects is limited)
+OUTPUT_DIR="/work/hdd/bfqt/yurenh2/rlvr_results/${PRECISION_MODE}_seed${SEED}"
+mkdir -p "$OUTPUT_DIR"
+
+# DeepSpeed config (ZeRO-3 for full sharding of model/optimizer/gradients)
+DEEPSPEED_CONFIG="${PROJECT_DIR}/configs/deepspeed_zero3.json"
+
+# Number of GPUs for DeepSpeed (all GPUs, ref model on same GPU as training per rank)
+NUM_GPUS=$(python -c 'import torch; print(torch.cuda.device_count())')
+echo "Using $NUM_GPUS GPUs for DeepSpeed training (ref model on each rank's GPU)"
+
+# Use random port to avoid conflicts with other jobs
+MASTER_PORT=$((29500 + RANDOM % 1000))
+echo "Using master port: $MASTER_PORT"
+
+# Run training with DeepSpeed
+echo ""
+echo "Starting training with DeepSpeed ZeRO-3..."
+echo "============================================"
+
+deepspeed --num_gpus=$NUM_GPUS --master_port=$MASTER_PORT train_rlvr.py \
+ --precision_mode "$PRECISION_MODE" \
+ --seed "$SEED" \
+ --output_dir "$OUTPUT_DIR" \
+ --train_dataset_path "$TRAIN_DATA" \
+ --model_name "$MODEL_NAME" \
+ --num_steps "$NUM_STEPS" \
+ --deepspeed "$DEEPSPEED_CONFIG" \
+ 2>&1 | tee "${OUTPUT_DIR}/training_slurm.log"
+
+# IMPORTANT: With set -o pipefail, $? now captures python's exit code, not tee's
+TRAIN_EXIT_CODE=$?
+
+echo ""
+echo "============================================"
+echo "Training completed with exit code: $TRAIN_EXIT_CODE"
+echo "End time: $(date)"
+echo "============================================"
+
+# If training succeeded, run evaluation
+if [ $TRAIN_EXIT_CODE -eq 0 ]; then
+ echo ""
+ echo "Starting evaluation..."
+ echo "============================================"
+
+ python eval_policy.py \
+ --base_ckpt "$MODEL_NAME" \
+ --ft_ckpt "${OUTPUT_DIR}/final_model" \
+ --eval_tasks_config configs/eval_tasks_config.json \
+ --output_path "results/eval_metrics/${PRECISION_MODE}_seed${SEED}.json" \
+ --eval_base \
+ --use_amp \
+ 2>&1 | tee "${OUTPUT_DIR}/eval_slurm.log"
+fi
+
+echo ""
+echo "Job completed at: $(date)"
+
diff --git a/scripts/submit_all_jobs.sh b/scripts/submit_all_jobs.sh
new file mode 100755
index 0000000..86c0f5d
--- /dev/null
+++ b/scripts/submit_all_jobs.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# submit_all_jobs.sh
+# Submit all experiment jobs to SLURM queue
+# Jobs will run automatically when resources become available
+
+set -e
+
+PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
+cd "$PROJECT_DIR"
+
+# Create log directory
+mkdir -p results/slurm_logs
+
+# Configuration
+SEEDS=(1 2 3 4 5)
+PRECISION_MODES=("fp32" "bf16")
+
+echo "============================================"
+echo "Submitting RLVR Experiment Jobs"
+echo "============================================"
+echo "Seeds: ${SEEDS[*]}"
+echo "Precision Modes: ${PRECISION_MODES[*]}"
+echo "Total jobs: $((${#SEEDS[@]} * ${#PRECISION_MODES[@]}))"
+echo "============================================"
+
+# Track submitted job IDs
+declare -a JOB_IDS
+
+for precision in "${PRECISION_MODES[@]}"; do
+ for seed in "${SEEDS[@]}"; do
+ JOB_NAME="rlvr_${precision}_s${seed}"
+
+ echo "Submitting: $JOB_NAME"
+
+ # Submit job with environment variables
+ JOB_ID=$(sbatch \
+ --job-name="$JOB_NAME" \
+ --export=ALL,PRECISION_MODE="$precision",SEED="$seed" \
+ scripts/slurm_train.sh | awk '{print $4}')
+
+ JOB_IDS+=("$JOB_ID")
+ echo " -> Job ID: $JOB_ID"
+ done
+done
+
+echo ""
+echo "============================================"
+echo "All jobs submitted!"
+echo "Job IDs: ${JOB_IDS[*]}"
+echo "============================================"
+echo ""
+echo "Monitor with:"
+echo " squeue -u $USER"
+echo " squeue -j $(IFS=,; echo "${JOB_IDS[*]}")"
+echo ""
+echo "View logs:"
+echo " tail -f results/slurm_logs/rlvr_*.out"
+echo ""
+echo "Cancel all:"
+echo " scancel ${JOB_IDS[*]}"
+echo "============================================"
+
+# Save job IDs for reference
+echo "${JOB_IDS[*]}" > results/slurm_logs/submitted_jobs.txt
+echo "Job IDs saved to: results/slurm_logs/submitted_jobs.txt"
+
diff --git a/scripts/submit_single_job.sh b/scripts/submit_single_job.sh
new file mode 100755
index 0000000..7fe7492
--- /dev/null
+++ b/scripts/submit_single_job.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# submit_single_job.sh
+# Submit a single training job
+# Usage: ./submit_single_job.sh <precision_mode> <seed>
+# Example: ./submit_single_job.sh bf16 1
+
+PRECISION_MODE=${1:-"bf16"}
+SEED=${2:-1}
+
+PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
+cd "$PROJECT_DIR"
+
+mkdir -p results/slurm_logs
+
+JOB_NAME="rlvr_${PRECISION_MODE}_s${SEED}"
+
+echo "Submitting job: $JOB_NAME"
+echo " Precision: $PRECISION_MODE"
+echo " Seed: $SEED"
+
+JOB_ID=$(sbatch \
+ --job-name="$JOB_NAME" \
+ --export=ALL,PRECISION_MODE="$PRECISION_MODE",SEED="$SEED" \
+ scripts/slurm_train.sh | awk '{print $4}')
+
+echo ""
+echo "Submitted! Job ID: $JOB_ID"
+echo ""
+echo "Monitor with: squeue -j $JOB_ID"
+echo "View output: tail -f results/slurm_logs/${JOB_NAME}_${JOB_ID}.out"
+echo "Cancel: scancel $JOB_ID"
+
diff --git a/scripts/test_quick.sh b/scripts/test_quick.sh
new file mode 100644
index 0000000..f66e73b
--- /dev/null
+++ b/scripts/test_quick.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+#SBATCH --job-name=rlvr_test
+#SBATCH --account=bfqt-delta-gpu
+#SBATCH --partition=gpuH200x8
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=16
+#SBATCH --gres=gpu:h200:4
+#SBATCH --mem=200G
+#SBATCH --time=04:00:00
+#SBATCH --output=results/slurm_logs/test_%j.out
+#SBATCH --error=results/slurm_logs/test_%j.err
+
+set -o pipefail
+
+PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
+cd "$PROJECT_DIR"
+
+source ~/.bashrc
+conda activate rlvr-fp
+
+export HF_HOME="/work/hdd/bfqt/yurenh2/huggingface_cache"
+export HF_HUB_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/hub"
+
+echo "============================================"
+echo "Quick test on $(hostname)"
+echo "SLURM Job ID: $SLURM_JOB_ID"
+nvidia-smi
+echo "============================================"
+
+NUM_GPUS=$(python -c 'import torch; print(torch.cuda.device_count())')
+echo "Using $NUM_GPUS GPUs for DeepSpeed"
+
+# Use random port to avoid conflicts
+MASTER_PORT=$((29500 + RANDOM % 1000))
+echo "Using master port: $MASTER_PORT"
+
+# Test fp32 with just 3 steps
+echo "Testing fp32..."
+deepspeed --num_gpus=$NUM_GPUS --master_port=$MASTER_PORT train_rlvr.py \
+ --precision_mode fp32 \
+ --seed 1 \
+ --output_dir /work/hdd/bfqt/yurenh2/rlvr_results/test_fp32 \
+ --train_dataset_path data/dm_train.json \
+ --model_name Qwen/Qwen2.5-Math-7B \
+ --num_steps 3 \
+ --deepspeed configs/deepspeed_zero3.json
+
+FP32_EXIT=$?
+echo "fp32 test exit code: $FP32_EXIT"
+
+if [ $FP32_EXIT -eq 0 ]; then
+ echo "fp32 test PASSED"
+
+ # Also test bf16
+ echo "Testing bf16..."
+ deepspeed --num_gpus=$NUM_GPUS --master_port=$MASTER_PORT train_rlvr.py \
+ --precision_mode bf16 \
+ --seed 1 \
+ --output_dir /work/hdd/bfqt/yurenh2/rlvr_results/test_bf16 \
+ --train_dataset_path data/dm_train.json \
+ --model_name Qwen/Qwen2.5-Math-7B \
+ --num_steps 3 \
+ --deepspeed configs/deepspeed_zero3.json
+
+ BF16_EXIT=$?
+ echo "bf16 test exit code: $BF16_EXIT"
+
+ if [ $BF16_EXIT -eq 0 ]; then
+ echo "============================================"
+ echo "ALL TESTS PASSED!"
+ echo "============================================"
+ else
+ echo "bf16 test FAILED"
+ fi
+else
+ echo "fp32 test FAILED"
+fi