diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2025-12-17 04:29:37 -0600 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2025-12-17 04:29:37 -0600 |
| commit | e43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 (patch) | |
| tree | 6ce8a00d2f8b9ebd83c894a27ea01ac50cfb2ff5 /src/personalization/user_model/features.py | |
Diffstat (limited to 'src/personalization/user_model/features.py')
| -rw-r--r-- | src/personalization/user_model/features.py | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/src/personalization/user_model/features.py b/src/personalization/user_model/features.py new file mode 100644 index 0000000..a4508b4 --- /dev/null +++ b/src/personalization/user_model/features.py @@ -0,0 +1,49 @@ +import numpy as np +from dataclasses import dataclass +from sklearn.decomposition import PCA + +@dataclass +class ItemProjection: + P: np.ndarray # [k, d] + mean: np.ndarray # [d] + + @classmethod + def from_pca(cls, embeddings: np.ndarray, k: int) -> "ItemProjection": + """ + embeddings: [M, d] + """ + mean = embeddings.mean(axis=0) + centered = embeddings - mean + + # Ensure k is not larger than min(n_samples, n_features) + n_samples, n_features = embeddings.shape + actual_k = min(k, n_samples, n_features) + + pca = PCA(n_components=actual_k) + pca.fit(centered) + + # pca.components_: [k, d] + P = pca.components_ # Each row is a principal component vector + + # If we had to reduce k, we might want to pad P or handle it? + # For now, let's assume we get what we asked for or less if data is small. + # But for the system we want fixed k. + # If actual_k < k, we should pad with zeros to match expected dimension. + if actual_k < k: + padding = np.zeros((k - actual_k, n_features), dtype=P.dtype) + P = np.vstack([P, padding]) + + return cls(P=P, mean=mean) + + def transform_embeddings(self, E: np.ndarray) -> np.ndarray: + """ + E: [N, d] -> [N, k] + """ + return (E - self.mean) @ self.P.T + + def transform_vector(self, e: np.ndarray) -> np.ndarray: + """ + e: [d] -> [k] + """ + return self.P @ (e - self.mean) + |
