summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill DePue <williamd@openai.com>2026-03-18 11:20:27 -0700
committerWill DePue <williamd@openai.com>2026-03-18 11:20:52 -0700
commit0c0ea98e6ad92bab5fd2aaab226b6a6f0e68f4d2 (patch)
treec8261adf355d3dbe11ceb3a96e2b490204f206fa
parentfe16a5a6c17ee88288b02f04b3d4e7caf02bf4af (diff)
Remove scripts
-rwxr-xr-xscripts/replace_hf_dataset_with_export.py116
-rwxr-xr-xscripts/upload_when_ready.sh23
2 files changed, 0 insertions, 139 deletions
diff --git a/scripts/replace_hf_dataset_with_export.py b/scripts/replace_hf_dataset_with_export.py
deleted file mode 100755
index 4934755..0000000
--- a/scripts/replace_hf_dataset_with_export.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python3
-"""Replace challenge dataset artifacts in a Hugging Face dataset repo with a local export."""
-
-from __future__ import annotations
-
-import argparse
-from pathlib import Path
-
-from huggingface_hub import HfApi
-
-
-DEFAULT_REPO_ID = "willdepueoai/parameter-golf"
-DEFAULT_PATH_IN_REPO = "datasets"
-DATA_ARTIFACT_NAMES = {
- "datasets",
- "tokenizers",
- "manifest.json",
- "docs_selected.jsonl",
- "docs_selected.source_manifest.json",
- "tokenizer_config.export.json",
- "snapshot_meta.json",
-}
-
-
-def repo_path(prefix: str, name: str) -> str:
- return f"{prefix}/{name}" if prefix else name
-
-
-def build_parser() -> argparse.ArgumentParser:
- parser = argparse.ArgumentParser(description="Replace old dataset artifacts in a HF dataset repo with a local export")
- parser.add_argument("--repo-id", default=DEFAULT_REPO_ID)
- parser.add_argument("--local-export-root", required=True)
- parser.add_argument("--path-in-repo", default=DEFAULT_PATH_IN_REPO, help="Subdirectory inside the dataset repo")
- parser.add_argument("--repo-type", default="dataset")
- parser.add_argument("--revision", default=None)
- parser.add_argument("--commit-message", default="Replace dataset export")
- parser.add_argument("--dry-run", action="store_true")
- return parser
-
-
-def main() -> None:
- args = build_parser().parse_args()
- api = HfApi()
- local_export_root = Path(args.local_export_root).expanduser().resolve()
- if not local_export_root.is_dir():
- raise FileNotFoundError(local_export_root)
-
- prefix = args.path_in_repo.strip("/")
- top_level_local = {path.name for path in local_export_root.iterdir()}
- delete_names = sorted(DATA_ARTIFACT_NAMES | top_level_local)
- root_entries = {
- entry.path: entry
- for entry in api.list_repo_tree(
- repo_id=args.repo_id,
- recursive=False,
- repo_type=args.repo_type,
- revision=args.revision,
- )
- }
-
- if prefix:
- if prefix in root_entries:
- print(f"delete {prefix}")
- if not args.dry_run:
- api.delete_folder(
- prefix,
- repo_id=args.repo_id,
- repo_type=args.repo_type,
- revision=args.revision,
- commit_message=f"Delete {prefix}",
- )
-
- remote_entries = root_entries if not prefix else {}
-
- for name in delete_names:
- if prefix:
- break
- remote_path = repo_path(prefix, name)
- entry = remote_entries.get(remote_path)
- if entry is None:
- continue
- print(f"delete {remote_path}")
- if args.dry_run:
- continue
- if entry.__class__.__name__ == "RepoFolder":
- api.delete_folder(
- remote_path,
- repo_id=args.repo_id,
- repo_type=args.repo_type,
- revision=args.revision,
- commit_message=f"Delete {remote_path}",
- )
- else:
- api.delete_file(
- remote_path,
- repo_id=args.repo_id,
- repo_type=args.repo_type,
- revision=args.revision,
- commit_message=f"Delete {remote_path}",
- )
-
- print(f"upload {local_export_root} -> {prefix or '/'}")
- if args.dry_run:
- return
- api.upload_folder(
- repo_id=args.repo_id,
- repo_type=args.repo_type,
- revision=args.revision,
- folder_path=local_export_root,
- path_in_repo=prefix or None,
- commit_message=args.commit_message,
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/upload_when_ready.sh b/scripts/upload_when_ready.sh
deleted file mode 100755
index 570ad0a..0000000
--- a/scripts/upload_when_ready.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-if [[ $# -lt 2 || $# -gt 3 ]]; then
- echo "usage: $0 <src_root> <dest_root> [poll_seconds]" >&2
- exit 1
-fi
-
-src_root=$1
-dest_root=$2
-poll_seconds=${3:-120}
-manifest_path="${src_root%/}/manifest.json"
-
-if [[ "$poll_seconds" -le 0 ]]; then
- echo "poll_seconds must be positive" >&2
- exit 1
-fi
-
-while [[ ! -f "$manifest_path" ]]; do
- sleep "$poll_seconds"
-done
-
-bbb cptree "$src_root" "$dest_root"