summaryrefslogtreecommitdiff
path: root/scripts/download_personamem.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/download_personamem.py')
-rw-r--r--scripts/download_personamem.py25
1 files changed, 25 insertions, 0 deletions
diff --git a/scripts/download_personamem.py b/scripts/download_personamem.py
new file mode 100644
index 0000000..31b4e0e
--- /dev/null
+++ b/scripts/download_personamem.py
@@ -0,0 +1,25 @@
+from huggingface_hub import hf_hub_download
+import os
+
+repo_id = "bowen-upenn/PersonaMem"
+local_dir = "data/raw_datasets/personamem"
+files_to_download = [
+ "questions_32k.csv",
+ "shared_contexts_32k.jsonl"
+]
+
+os.makedirs(local_dir, exist_ok=True)
+
+print(f"Downloading files from {repo_id} to {local_dir}...")
+
+for filename in files_to_download:
+ print(f"Downloading {filename}...")
+ hf_hub_download(
+ repo_id=repo_id,
+ filename=filename,
+ repo_type="dataset",
+ local_dir=local_dir
+ )
+
+print("Download complete.")
+