From 5626080ca4c4219aec4888d6b9406d0d3349fb55 Mon Sep 17 00:00:00 2001
From: YurenHao0426 <blackhao0426@gmail.com>
Date: Tue, 10 Feb 2026 20:16:36 +0000
Subject: Add RAG rewrite, 60-session experiment scripts, and analysis tools

- RAG rewrite adapter and vector preference pipeline in personalized_llm
- 60-session experiment queue scripts (reflection, rag, rag_vector, rag_rewrite)
- Vector-preference correlation analysis and visualization scripts
- Local reward model batch processing improvements
- Updated CLAUDE.md with full experiment documentation and notes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../scripts/analyze_vector_preference.py           | 103 +++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100755 collaborativeagents/scripts/analyze_vector_preference.py

(limited to 'collaborativeagents/scripts/analyze_vector_preference.py')

diff --git a/collaborativeagents/scripts/analyze_vector_preference.py b/collaborativeagents/scripts/analyze_vector_preference.py
new file mode 100755
index 0000000..7079b26
--- /dev/null
+++ b/collaborativeagents/scripts/analyze_vector_preference.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+"""
+分析 user vector 与 revealed preference 之间的关联强度
+"""
+import json
+import numpy as np
+from pathlib import Path
+import sys
+
+def load_experiment(exp_dir):
+    """加载实验结果"""
+    exp_path = Path(exp_dir)
+    
+    # 找到结果目录
+    for method in ["rag_vector", "rag_vector_fast", "rag_vector_balanced"]:
+        for sub in exp_path.iterdir():
+            result_dir = sub / method
+            if result_dir.exists():
+                vectors_path = result_dir / "user_vectors.npz"
+                results_path = result_dir / "results.json"
+                if vectors_path.exists() and results_path.exists():
+                    return {
+                        "vectors": np.load(vectors_path, allow_pickle=True),
+                        "results": json.load(open(results_path)),
+                        "method": method
+                    }
+    return None
+
+def analyze_vectors(data):
+    """分析user vectors"""
+    vectors = data["vectors"]
+    results = data["results"]
+    
+    user_ids = vectors["user_ids"]
+    z_long = vectors["z_long"]
+    z_short = vectors["z_short"]
+    
+    print(f"=== User Vector 分析 ===")
+    print(f"用户数: {len(user_ids)}")
+    print(f"Vector维度: {z_long.shape[1]}")
+    
+    # 计算非零vector数量
+    z_long_norms = np.linalg.norm(z_long, axis=1)
+    z_short_norms = np.linalg.norm(z_short, axis=1)
+    
+    nonzero_long = np.count_nonzero(z_long_norms)
+    nonzero_short = np.count_nonzero(z_short_norms)
+    
+    print(f"\nz_long 非零用户: {nonzero_long}/{len(user_ids)}")
+    print(f"z_short 非零用户: {nonzero_short}/{len(user_ids)}")
+    print(f"z_long norm 均值: {np.mean(z_long_norms):.4f}")
+    print(f"z_short norm 均值: {np.mean(z_short_norms):.4f}")
+    
+    # 按用户分析性能与vector norm的关系
+    print(f"\n=== Vector Norm vs 性能 ===")
+    
+    user_stats = {}
+    for s in results:
+        uid = s.get("profile_id", "")
+        if uid not in user_stats:
+            user_stats[uid] = {"success": 0, "total": 0, "enforce": 0}
+        m = s.get("metrics", {})
+        user_stats[uid]["total"] += 1
+        user_stats[uid]["success"] += 1 if m.get("task_success", False) else 0
+        user_stats[uid]["enforce"] += m.get("enforcement_count", 0)
+    
+    # 计算相关性
+    success_rates = []
+    norms = []
+    
+    for i, uid in enumerate(user_ids):
+        if uid in user_stats and user_stats[uid]["total"] > 0:
+            sr = user_stats[uid]["success"] / user_stats[uid]["total"]
+            success_rates.append(sr)
+            norms.append(z_long_norms[i])
+    
+    if len(success_rates) > 5:
+        corr = np.corrcoef(success_rates, norms)[0, 1]
+        print(f"z_long norm vs 成功率 相关系数: {corr:.4f}")
+    
+    return {
+        "n_users": len(user_ids),
+        "nonzero_long": nonzero_long,
+        "nonzero_short": nonzero_short,
+        "mean_norm_long": float(np.mean(z_long_norms)),
+        "mean_norm_short": float(np.mean(z_short_norms)),
+    }
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python analyze_vector_preference.py <experiment_dir>")
+        print("Example: python analyze_vector_preference.py collaborativeagents/results/rag_vector_v3")
+        sys.exit(1)
+    
+    exp_dir = sys.argv[1]
+    data = load_experiment(exp_dir)
+    
+    if data is None:
+        print(f"未找到有效的rag_vector实验结果: {exp_dir}")
+        sys.exit(1)
+    
+    print(f"加载实验: {data['method']}")
+    analyze_vectors(data)
-- 
cgit v1.2.3