#!/usr/bin/env python3 """One-click restore of the git-ignored large assets (TinyStories-BPE data + key checkpoints) from the private HF dataset repo, into the correct paths. So: `git clone` + `python pull_assets.py` = a full working tree (ep_run/data/ + ep_run/runs/ reconstructed in place). Prereqs: pip install -U huggingface_hub huggingface-cli login # must have access to the private repo below (ask Yuren) What it restores: ep_run/data/tinystories_bpe/ (train.bin / val.bin / tokenizer.json / meta.pkl) ep_run/runs/redx_traj/s2000.pt (the warm-start operator, §5 of ONBOARDING) ep_run/runs/{ep_rr_ajr, ep_resreg_scratch, ep_fast_adaptive, bptt_clean}.pt (key result checkpoints) """ import os, sys, subprocess REPO = "blackhao0426/ept-assets" # private HF dataset repo (mirrors ep_run/ layout) HERE = os.path.dirname(os.path.abspath(__file__)) DEST = os.path.join(HERE, "ep_run") def main(): try: from huggingface_hub import snapshot_download except ImportError: subprocess.run([sys.executable, "-m", "pip", "install", "-q", "huggingface_hub"], check=True) from huggingface_hub import snapshot_download print(f"restoring {REPO} -> {DEST}/{{data,runs}} ...", flush=True) snapshot_download(repo_id=REPO, repo_type="dataset", local_dir=DEST) for p in ("data/tinystories_bpe", "runs"): fp = os.path.join(DEST, p) if os.path.isdir(fp): print(f" ok ep_run/{p}/ ({len(os.listdir(fp))} items)", flush=True) print("done — working tree restored.", flush=True) if __name__ == "__main__": main()