diff options
| author | Will DePue <williamd@openai.com> | 2026-03-18 11:20:27 -0700 |
|---|---|---|
| committer | Will DePue <williamd@openai.com> | 2026-03-18 11:20:52 -0700 |
| commit | 0c0ea98e6ad92bab5fd2aaab226b6a6f0e68f4d2 (patch) | |
| tree | c8261adf355d3dbe11ceb3a96e2b490204f206fa | |
| parent | fe16a5a6c17ee88288b02f04b3d4e7caf02bf4af (diff) | |
Remove scripts
| -rwxr-xr-x | scripts/replace_hf_dataset_with_export.py | 116 | ||||
| -rwxr-xr-x | scripts/upload_when_ready.sh | 23 |
2 files changed, 0 insertions, 139 deletions
diff --git a/scripts/replace_hf_dataset_with_export.py b/scripts/replace_hf_dataset_with_export.py deleted file mode 100755 index 4934755..0000000 --- a/scripts/replace_hf_dataset_with_export.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python3 -"""Replace challenge dataset artifacts in a Hugging Face dataset repo with a local export.""" - -from __future__ import annotations - -import argparse -from pathlib import Path - -from huggingface_hub import HfApi - - -DEFAULT_REPO_ID = "willdepueoai/parameter-golf" -DEFAULT_PATH_IN_REPO = "datasets" -DATA_ARTIFACT_NAMES = { - "datasets", - "tokenizers", - "manifest.json", - "docs_selected.jsonl", - "docs_selected.source_manifest.json", - "tokenizer_config.export.json", - "snapshot_meta.json", -} - - -def repo_path(prefix: str, name: str) -> str: - return f"{prefix}/{name}" if prefix else name - - -def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Replace old dataset artifacts in a HF dataset repo with a local export") - parser.add_argument("--repo-id", default=DEFAULT_REPO_ID) - parser.add_argument("--local-export-root", required=True) - parser.add_argument("--path-in-repo", default=DEFAULT_PATH_IN_REPO, help="Subdirectory inside the dataset repo") - parser.add_argument("--repo-type", default="dataset") - parser.add_argument("--revision", default=None) - parser.add_argument("--commit-message", default="Replace dataset export") - parser.add_argument("--dry-run", action="store_true") - return parser - - -def main() -> None: - args = build_parser().parse_args() - api = HfApi() - local_export_root = Path(args.local_export_root).expanduser().resolve() - if not local_export_root.is_dir(): - raise FileNotFoundError(local_export_root) - - prefix = args.path_in_repo.strip("/") - top_level_local = {path.name for path in local_export_root.iterdir()} - delete_names = sorted(DATA_ARTIFACT_NAMES | top_level_local) - root_entries = { - entry.path: entry - for entry in api.list_repo_tree( - repo_id=args.repo_id, - recursive=False, - repo_type=args.repo_type, - revision=args.revision, - ) - } - - if prefix: - if prefix in root_entries: - print(f"delete {prefix}") - if not args.dry_run: - api.delete_folder( - prefix, - repo_id=args.repo_id, - repo_type=args.repo_type, - revision=args.revision, - commit_message=f"Delete {prefix}", - ) - - remote_entries = root_entries if not prefix else {} - - for name in delete_names: - if prefix: - break - remote_path = repo_path(prefix, name) - entry = remote_entries.get(remote_path) - if entry is None: - continue - print(f"delete {remote_path}") - if args.dry_run: - continue - if entry.__class__.__name__ == "RepoFolder": - api.delete_folder( - remote_path, - repo_id=args.repo_id, - repo_type=args.repo_type, - revision=args.revision, - commit_message=f"Delete {remote_path}", - ) - else: - api.delete_file( - remote_path, - repo_id=args.repo_id, - repo_type=args.repo_type, - revision=args.revision, - commit_message=f"Delete {remote_path}", - ) - - print(f"upload {local_export_root} -> {prefix or '/'}") - if args.dry_run: - return - api.upload_folder( - repo_id=args.repo_id, - repo_type=args.repo_type, - revision=args.revision, - folder_path=local_export_root, - path_in_repo=prefix or None, - commit_message=args.commit_message, - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/upload_when_ready.sh b/scripts/upload_when_ready.sh deleted file mode 100755 index 570ad0a..0000000 --- a/scripts/upload_when_ready.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -if [[ $# -lt 2 || $# -gt 3 ]]; then - echo "usage: $0 <src_root> <dest_root> [poll_seconds]" >&2 - exit 1 -fi - -src_root=$1 -dest_root=$2 -poll_seconds=${3:-120} -manifest_path="${src_root%/}/manifest.json" - -if [[ "$poll_seconds" -le 0 ]]; then - echo "poll_seconds must be positive" >&2 - exit 1 -fi - -while [[ ! -f "$manifest_path" ]]; do - sleep "$poll_seconds" -done - -bbb cptree "$src_root" "$dest_root" |
