#!/usr/bin/env python3 """ 分析 user vector 与 revealed preference 之间的关联强度 """ import json import numpy as np from pathlib import Path import sys def load_experiment(exp_dir): """加载实验结果""" exp_path = Path(exp_dir) # 找到结果目录 for method in ["rag_vector", "rag_vector_fast", "rag_vector_balanced"]: for sub in exp_path.iterdir(): result_dir = sub / method if result_dir.exists(): vectors_path = result_dir / "user_vectors.npz" results_path = result_dir / "results.json" if vectors_path.exists() and results_path.exists(): return { "vectors": np.load(vectors_path, allow_pickle=True), "results": json.load(open(results_path)), "method": method } return None def analyze_vectors(data): """分析user vectors""" vectors = data["vectors"] results = data["results"] user_ids = vectors["user_ids"] z_long = vectors["z_long"] z_short = vectors["z_short"] print(f"=== User Vector 分析 ===") print(f"用户数: {len(user_ids)}") print(f"Vector维度: {z_long.shape[1]}") # 计算非零vector数量 z_long_norms = np.linalg.norm(z_long, axis=1) z_short_norms = np.linalg.norm(z_short, axis=1) nonzero_long = np.count_nonzero(z_long_norms) nonzero_short = np.count_nonzero(z_short_norms) print(f"\nz_long 非零用户: {nonzero_long}/{len(user_ids)}") print(f"z_short 非零用户: {nonzero_short}/{len(user_ids)}") print(f"z_long norm 均值: {np.mean(z_long_norms):.4f}") print(f"z_short norm 均值: {np.mean(z_short_norms):.4f}") # 按用户分析性能与vector norm的关系 print(f"\n=== Vector Norm vs 性能 ===") user_stats = {} for s in results: uid = s.get("profile_id", "") if uid not in user_stats: user_stats[uid] = {"success": 0, "total": 0, "enforce": 0} m = s.get("metrics", {}) user_stats[uid]["total"] += 1 user_stats[uid]["success"] += 1 if m.get("task_success", False) else 0 user_stats[uid]["enforce"] += m.get("enforcement_count", 0) # 计算相关性 success_rates = [] norms = [] for i, uid in enumerate(user_ids): if uid in user_stats and user_stats[uid]["total"] > 0: sr = user_stats[uid]["success"] / user_stats[uid]["total"] success_rates.append(sr) norms.append(z_long_norms[i]) if len(success_rates) > 5: corr = np.corrcoef(success_rates, norms)[0, 1] print(f"z_long norm vs 成功率 相关系数: {corr:.4f}") return { "n_users": len(user_ids), "nonzero_long": nonzero_long, "nonzero_short": nonzero_short, "mean_norm_long": float(np.mean(z_long_norms)), "mean_norm_short": float(np.mean(z_short_norms)), } if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python analyze_vector_preference.py ") print("Example: python analyze_vector_preference.py collaborativeagents/results/rag_vector_v3") sys.exit(1) exp_dir = sys.argv[1] data = load_experiment(exp_dir) if data is None: print(f"未找到有效的rag_vector实验结果: {exp_dir}") sys.exit(1) print(f"加载实验: {data['method']}") analyze_vectors(data)