blob: 743f99ab5ba7656d499fa0650faec4c9863c2923 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
#!/bin/bash
# setup_env.sh
# One-time setup script for the RLVR floating-point precision experiment
# Run this BEFORE submitting any jobs
set -e
CONDA_ENV="rlvr-fp"
PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
echo "============================================"
echo "RLVR Environment Setup"
echo "============================================"
# Setup HuggingFace cache directories
echo ""
echo "Setting up HuggingFace cache..."
HF_CACHE_DIR="/work/hdd/bfqt/yurenh2/huggingface_cache"
mkdir -p "$HF_CACHE_DIR/hub" "$HF_CACHE_DIR/transformers"
echo " Cache directory: $HF_CACHE_DIR"
# Add to shell profile if not already present
PROFILE_FILE="$HOME/.bashrc"
if ! grep -q "HF_HOME.*huggingface_cache" "$PROFILE_FILE" 2>/dev/null; then
echo ""
echo "Adding HuggingFace cache settings to $PROFILE_FILE..."
cat >> "$PROFILE_FILE" << 'EOF'
# HuggingFace cache - shared across all projects (added by RLVR setup)
export HF_HOME="/work/hdd/bfqt/yurenh2/huggingface_cache"
export HF_HUB_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/hub"
export TRANSFORMERS_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/transformers"
EOF
echo " Added to $PROFILE_FILE"
else
echo " HuggingFace settings already in $PROFILE_FILE"
fi
# Source to apply changes
source "$PROFILE_FILE"
# Check if conda environment exists
echo ""
echo "Checking conda environment..."
source ~/.bashrc
if conda env list | grep -q "^${CONDA_ENV} "; then
echo " Environment '$CONDA_ENV' already exists"
echo " To recreate, run: conda env remove -n $CONDA_ENV && $0"
else
echo " Creating conda environment: $CONDA_ENV"
conda create -n "$CONDA_ENV" python=3.10 -y
echo ""
echo "Installing dependencies..."
conda activate "$CONDA_ENV"
cd "$PROJECT_DIR"
pip install -r requirements.txt
echo ""
echo "Verifying installation..."
python -c "import torch; print(f'PyTorch: {torch.__version__}')"
python -c "import transformers; print(f'Transformers: {transformers.__version__}')"
fi
echo ""
echo "============================================"
echo "Setup complete!"
echo "============================================"
echo ""
echo "To activate the environment:"
echo " conda activate $CONDA_ENV"
echo ""
echo "To run experiments:"
echo " ./scripts/submit_all_jobs.sh"
echo ""
echo "HuggingFace cache location: $HF_CACHE_DIR"
echo " (1TB quota, shared across all projects)"
echo "============================================"
|