summaryrefslogtreecommitdiff
path: root/scripts/download_personamem.py
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2025-12-17 04:29:37 -0600
committerYurenHao0426 <blackhao0426@gmail.com>2025-12-17 04:29:37 -0600
commite43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 (patch)
tree6ce8a00d2f8b9ebd83c894a27ea01ac50cfb2ff5 /scripts/download_personamem.py
Initial commit (clean history)HEADmain
Diffstat (limited to 'scripts/download_personamem.py')
-rw-r--r--scripts/download_personamem.py25
1 files changed, 25 insertions, 0 deletions
diff --git a/scripts/download_personamem.py b/scripts/download_personamem.py
new file mode 100644
index 0000000..31b4e0e
--- /dev/null
+++ b/scripts/download_personamem.py
@@ -0,0 +1,25 @@
+from huggingface_hub import hf_hub_download
+import os
+
+repo_id = "bowen-upenn/PersonaMem"
+local_dir = "data/raw_datasets/personamem"
+files_to_download = [
+ "questions_32k.csv",
+ "shared_contexts_32k.jsonl"
+]
+
+os.makedirs(local_dir, exist_ok=True)
+
+print(f"Downloading files from {repo_id} to {local_dir}...")
+
+for filename in files_to_download:
+ print(f"Downloading {filename}...")
+ hf_hub_download(
+ repo_id=repo_id,
+ filename=filename,
+ repo_type="dataset",
+ local_dir=local_dir
+ )
+
+print("Download complete.")
+