#!/usr/bin/env bash # Sequential per-checkpoint, 3-shard parallel per checkpoint. set -e # no -u to avoid conda conflict REPO=/home/yurenh2/rrm/research/flossing CKPT_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" source "$(conda info --base)/etc/profile.d/conda.sh" conda activate rrm cd "$REPO" N=1024 K=8 mkdir -p ckpt_evolution CKPTS=(step_2604 step_7812 step_13020 step_18228 step_20832) for ckpt in "${CKPTS[@]}"; do if [[ -f "ckpt_evolution/${ckpt}.npz" ]]; then echo "skip $ckpt (merged exists)"; continue; fi echo "==> $ckpt" pids=() for shard in 0 1 2; do LOG=ckpt_evolution/${ckpt}_shard${shard}.log OUT=ckpt_evolution/${ckpt}_shard${shard}.npz if [[ -f "$OUT" ]]; then echo " skip $OUT"; continue; fi nohup env CUDA_VISIBLE_DEVICES=$shard python diagnose_hrm.py \ --ckpt-root "$CKPT_ROOT" --ckpt-name $ckpt \ --n-samples $N --num-shards 3 --shard-id $shard \ --batch-size 64 --k-lyap $K \ --out "$OUT" > "$LOG" 2>&1 & pids+=($!) done for pid in "${pids[@]}"; do wait $pid; done echo "<== $ckpt done" done # Final merge per checkpoint python - <<'PY' import numpy as np, glob, os out_dir = "/home/yurenh2/rrm/research/flossing/ckpt_evolution" ckpts = ["step_2604","step_7812","step_13020","step_18228","step_20832","step_26040"] for c in ckpts: files = sorted(glob.glob(f"{out_dir}/{c}_shard*.npz")) if not files: print(f"missing {c}"); continue m = {} for f in files: d = np.load(f) for k in d.files: m.setdefault(k, []).append(d[k]) for k in list(m.keys()): m[k] = np.concatenate(m[k], 0) out = f"{out_dir}/{c}.npz" np.savez_compressed(out, **m) s = m['exact_correct']>0.5 print(f"{c}: N={len(m['exact_correct'])} acc={m['exact_correct'].mean():.4f} " f"λ_1(s)={m['lyap_spec'][s,0].mean():+.3f} " f"λ_1(f)={m['lyap_spec'][~s,0].mean():+.3f}") PY