From 66e0d8b9fd4d0f7a2231d689c055e26fdf1cf04a Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sat, 13 Jun 2026 12:35:36 -0500 Subject: rrm workspace: TRM/HRM/SRM code, Maze dataset, dynamical-analysis pipeline Curated export for clone-and-run Maze training (2x A6000) + diagnostics. trm/hrm pretrain.py carry trajectory-augmentation code (backward-compatible). Heavy artifacts (checkpoints/wandb/npz) gitignored; see PROVENANCE.md. Co-Authored-By: Claude Fable 5 --- env/setup.sh | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100755 env/setup.sh (limited to 'env/setup.sh') diff --git a/env/setup.sh b/env/setup.sh new file mode 100755 index 0000000..6a8435c --- /dev/null +++ b/env/setup.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# rrm 统一环境安装脚本 +# 复现 HRM (sapientinc/HRM) + TRM (SamsungSAILMontreal/TinyRecursiveModels) 用 +# 用法: bash env/setup.sh +set -euo pipefail + +ENV_NAME=${ENV_NAME:-rrm} +PY_VER=${PY_VER:-3.10} + +source "$(conda info --base)/etc/profile.d/conda.sh" + +if ! conda env list | awk '{print $1}' | grep -qx "$ENV_NAME"; then + conda create -n "$ENV_NAME" python="$PY_VER" -y +fi +conda activate "$ENV_NAME" + +pip install --upgrade pip wheel setuptools packaging ninja setuptools-scm + +# Torch 2.7.0 cu126 (TRM specific_requirements pinned) +pip install torch==2.7.0+cu126 torchvision==0.22.0+cu126 torchaudio==2.7.0+cu126 \ + --index-url https://download.pytorch.org/whl/cu126 + +# 合并版 requirements (HRM + TRM) +pip install -r "$(dirname "$0")/requirements.txt" + +# FlashAttention 2 — A6000/Ampere prebuilt wheel (cxx11abi=TRUE 对应 torch 2.7 cu126) +pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl + +# adam-atan2 需要 nvcc 编译; conda 装 cuda-toolkit 提供 nvcc + 头文件 +conda install -y -c nvidia cuda-toolkit=12.6 +CUDA_HOME="$CONDA_PREFIX" pip install --no-cache-dir --no-build-isolation adam-atan2==0.0.3 + +# wandb 离线 (避免 smoke test 上传) +export WANDB_MODE=${WANDB_MODE:-offline} + +python - <<'PY' +import torch, flash_attn, adam_atan2 +print("torch:", torch.__version__, "CUDA:", torch.version.cuda) +print("GPUs:", torch.cuda.device_count(), "->", [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) +print("flash_attn:", flash_attn.__version__) +print("adam_atan2 ok") +PY + +echo +echo "==> rrm env ready. conda activate $ENV_NAME" -- cgit v1.2.3