From 0c0ea98e6ad92bab5fd2aaab226b6a6f0e68f4d2 Mon Sep 17 00:00:00 2001 From: Will DePue Date: Wed, 18 Mar 2026 11:20:27 -0700 Subject: Remove scripts --- scripts/replace_hf_dataset_with_export.py | 116 ------------------------------ 1 file changed, 116 deletions(-) delete mode 100755 scripts/replace_hf_dataset_with_export.py (limited to 'scripts/replace_hf_dataset_with_export.py') diff --git a/scripts/replace_hf_dataset_with_export.py b/scripts/replace_hf_dataset_with_export.py deleted file mode 100755 index 4934755..0000000 --- a/scripts/replace_hf_dataset_with_export.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python3 -"""Replace challenge dataset artifacts in a Hugging Face dataset repo with a local export.""" - -from __future__ import annotations - -import argparse -from pathlib import Path - -from huggingface_hub import HfApi - - -DEFAULT_REPO_ID = "willdepueoai/parameter-golf" -DEFAULT_PATH_IN_REPO = "datasets" -DATA_ARTIFACT_NAMES = { - "datasets", - "tokenizers", - "manifest.json", - "docs_selected.jsonl", - "docs_selected.source_manifest.json", - "tokenizer_config.export.json", - "snapshot_meta.json", -} - - -def repo_path(prefix: str, name: str) -> str: - return f"{prefix}/{name}" if prefix else name - - -def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Replace old dataset artifacts in a HF dataset repo with a local export") - parser.add_argument("--repo-id", default=DEFAULT_REPO_ID) - parser.add_argument("--local-export-root", required=True) - parser.add_argument("--path-in-repo", default=DEFAULT_PATH_IN_REPO, help="Subdirectory inside the dataset repo") - parser.add_argument("--repo-type", default="dataset") - parser.add_argument("--revision", default=None) - parser.add_argument("--commit-message", default="Replace dataset export") - parser.add_argument("--dry-run", action="store_true") - return parser - - -def main() -> None: - args = build_parser().parse_args() - api = HfApi() - local_export_root = Path(args.local_export_root).expanduser().resolve() - if not local_export_root.is_dir(): - raise FileNotFoundError(local_export_root) - - prefix = args.path_in_repo.strip("/") - top_level_local = {path.name for path in local_export_root.iterdir()} - delete_names = sorted(DATA_ARTIFACT_NAMES | top_level_local) - root_entries = { - entry.path: entry - for entry in api.list_repo_tree( - repo_id=args.repo_id, - recursive=False, - repo_type=args.repo_type, - revision=args.revision, - ) - } - - if prefix: - if prefix in root_entries: - print(f"delete {prefix}") - if not args.dry_run: - api.delete_folder( - prefix, - repo_id=args.repo_id, - repo_type=args.repo_type, - revision=args.revision, - commit_message=f"Delete {prefix}", - ) - - remote_entries = root_entries if not prefix else {} - - for name in delete_names: - if prefix: - break - remote_path = repo_path(prefix, name) - entry = remote_entries.get(remote_path) - if entry is None: - continue - print(f"delete {remote_path}") - if args.dry_run: - continue - if entry.__class__.__name__ == "RepoFolder": - api.delete_folder( - remote_path, - repo_id=args.repo_id, - repo_type=args.repo_type, - revision=args.revision, - commit_message=f"Delete {remote_path}", - ) - else: - api.delete_file( - remote_path, - repo_id=args.repo_id, - repo_type=args.repo_type, - revision=args.revision, - commit_message=f"Delete {remote_path}", - ) - - print(f"upload {local_export_root} -> {prefix or '/'}") - if args.dry_run: - return - api.upload_folder( - repo_id=args.repo_id, - repo_type=args.repo_type, - revision=args.revision, - folder_path=local_export_root, - path_in_repo=prefix or None, - commit_message=args.commit_message, - ) - - -if __name__ == "__main__": - main() -- cgit v1.2.3