notebooks/upload_to_hf.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

"""Bundle model code + checkpoints + configs + a small Sudoku test set and push to a HuggingFace
model repo so the notebook is fully self-contained. Run AFTER `huggingface-cli login`.
  python upload_to_hf.py            # creates/updates HF_REPO (public)
"""
import shutil, json
from pathlib import Path
import numpy as np
from huggingface_hub import HfApi, create_repo

HF_REPO = "YurenHao0426/recursive-reasoning-chaos"
RRM = Path("/home/yurenh2/rrm")
TRM_CK = RRM / "trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro"
HRM_CK = RRM / "hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
DATA = RRM / "data/sudoku-extreme-1k-aug-1000"

stage = Path("/tmp/hf_chaos_upload")
if stage.exists():
    shutil.rmtree(stage)
stage.mkdir(parents=True)

# 1. model code (separate packages)
shutil.copytree(RRM / "trm/models", stage / "code_trm/models", ignore=shutil.ignore_patterns("__pycache__"))
shutil.copytree(RRM / "hrm/models", stage / "code_hrm/models", ignore=shutil.ignore_patterns("__pycache__"))

# 2. checkpoints + configs
(stage / "trm_sudoku").mkdir(); (stage / "hrm_sudoku").mkdir()
shutil.copy(TRM_CK / "step_58590", stage / "trm_sudoku/weights.pt")
shutil.copy(TRM_CK / "all_config.yaml", stage / "trm_sudoku/all_config.yaml")
shutil.copy(HRM_CK / "step_26040", stage / "hrm_sudoku/weights.pt")
shutil.copy(HRM_CK / "all_config.yaml", stage / "hrm_sudoku/all_config.yaml")

# 3. small test set (2000 puzzles) + meta
(stage / "data").mkdir()
rng = np.random.default_rng(0)
inp = np.load(DATA / "test/all__inputs.npy"); lab = np.load(DATA / "test/all__labels.npy")
pid = np.load(DATA / "test/all__puzzle_identifiers.npy")
sel = rng.choice(len(inp), 2000, replace=False)
np.save(stage / "data/sudoku_test_inputs.npy", inp[sel])
np.save(stage / "data/sudoku_test_labels.npy", lab[sel])
np.save(stage / "data/sudoku_test_pid.npy", pid[sel])
meta = json.loads((DATA / "train/dataset.json").read_text())
(stage / "data/sudoku_meta.json").write_text(json.dumps(meta))

(stage / "README.md").write_text(
    "# recursive-reasoning-chaos\n\n"
    "Trained TRM/HRM checkpoints (Sudoku-Extreme) + model code + a 2000-puzzle test set, for the\n"
    "companion notebook (github.com/YurenHao0426/recursive-reasoning-dynamics, "
    "notebooks/recursive_reasoning_chaos.ipynb). Reproduces 'recursive-reasoning failures are\n"
    "(transient) chaos': TRM failures escape with more inference compute; HRM failures stay trapped.\n\n"
    "Layout: `code_trm/`, `code_hrm/` (model packages), `trm_sudoku/`, `hrm_sudoku/` "
    "(weights.pt + all_config.yaml), `data/` (sudoku test subset + meta).\n\n"
    "Upstream model code: TRM (SamsungSAILMontreal/TinyRecursiveModels), HRM (sapientinc/HRM).\n")

print("staged at", stage, "-> sizes:")
import subprocess; print(subprocess.run(["du", "-sh", str(stage)], capture_output=True, text=True).stdout.strip())

api = HfApi()
create_repo(HF_REPO, repo_type="model", private=False, exist_ok=True)
api.upload_folder(folder_path=str(stage), repo_id=HF_REPO, repo_type="model")
print(f"uploaded to https://huggingface.co/{HF_REPO}")