From e43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 17 Dec 2025 04:29:37 -0600 Subject: Initial commit (clean history) --- scripts/build_item_space.py | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 scripts/build_item_space.py (limited to 'scripts/build_item_space.py') diff --git a/scripts/build_item_space.py b/scripts/build_item_space.py new file mode 100644 index 0000000..c98238c --- /dev/null +++ b/scripts/build_item_space.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +""" +Script to build Item Space (PCA Projection) from Memory Embeddings. +Inputs: +- data/corpora/memory_embeddings.npy (M x 4096) +Outputs: +- data/corpora/item_projection.npz (P, mean, V) +""" + +import sys +import os +import numpy as np + +# Add src to sys.path +sys.path.append(os.path.join(os.path.dirname(__file__), "../src")) + +from personalization.user_model.features import ItemProjection + +def main(): + emb_path = "data/corpora/memory_embeddings.npy" + out_path = "data/corpora/item_projection.npz" + + if not os.path.exists(emb_path): + print(f"Error: {emb_path} not found. Run migrate_preferences.py first.") + sys.exit(1) + + print(f"Loading embeddings from {emb_path}...") + E = np.load(emb_path) + print(f"Loaded shape: {E.shape}") + + # Target dimension k=256 + k = 256 + print(f"Fitting PCA with k={k}...") + + proj = ItemProjection.from_pca(E, k=k) + + print("Transforming all embeddings to item space...") + V = proj.transform_embeddings(E) + print(f"Item vectors shape: {V.shape}") + + print(f"Saving projection to {out_path}...") + np.savez( + out_path, + P=proj.P, + mean=proj.mean, + V=V + ) + print("Done.") + +if __name__ == "__main__": + main() + -- cgit v1.2.3