#!/usr/bin/env python3 """ Script to build Item Space (PCA Projection) from Memory Embeddings. Inputs: - data/corpora/memory_embeddings.npy (M x 4096) Outputs: - data/corpora/item_projection.npz (P, mean, V) """ import sys import os import numpy as np # Add src to sys.path sys.path.append(os.path.join(os.path.dirname(__file__), "../src")) from personalization.user_model.features import ItemProjection def main(): emb_path = "data/corpora/memory_embeddings.npy" out_path = "data/corpora/item_projection.npz" if not os.path.exists(emb_path): print(f"Error: {emb_path} not found. Run migrate_preferences.py first.") sys.exit(1) print(f"Loading embeddings from {emb_path}...") E = np.load(emb_path) print(f"Loaded shape: {E.shape}") # Target dimension k=256 k = 256 print(f"Fitting PCA with k={k}...") proj = ItemProjection.from_pca(E, k=k) print("Transforming all embeddings to item space...") V = proj.transform_embeddings(E) print(f"Item vectors shape: {V.shape}") print(f"Saving projection to {out_path}...") np.savez( out_path, P=proj.P, mean=proj.mean, V=V ) print("Done.") if __name__ == "__main__": main()