summaryrefslogtreecommitdiff
path: root/scripts/download_personamem.py
blob: 31b4e0ea3bd93560a5b591932e7cf47182289916 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from huggingface_hub import hf_hub_download
import os

repo_id = "bowen-upenn/PersonaMem"
local_dir = "data/raw_datasets/personamem"
files_to_download = [
    "questions_32k.csv",
    "shared_contexts_32k.jsonl"
]

os.makedirs(local_dir, exist_ok=True)

print(f"Downloading files from {repo_id} to {local_dir}...")

for filename in files_to_download:
    print(f"Downloading {filename}...")
    hf_hub_download(
        repo_id=repo_id,
        filename=filename,
        repo_type="dataset",
        local_dir=local_dir
    )

print("Download complete.")