summaryrefslogtreecommitdiff
path: root/collaborativeagents/scripts/test_new_rewrite.sh
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-02-10 20:16:36 +0000
committerYurenHao0426 <blackhao0426@gmail.com>2026-02-10 20:16:36 +0000
commit5626080ca4c4219aec4888d6b9406d0d3349fb55 (patch)
tree86287d9fd5833e11ccd78566992540f2664fd195 /collaborativeagents/scripts/test_new_rewrite.sh
parenta2036838807428424bbbaff507a6563749a83145 (diff)
Add RAG rewrite, 60-session experiment scripts, and analysis tools
- RAG rewrite adapter and vector preference pipeline in personalized_llm - 60-session experiment queue scripts (reflection, rag, rag_vector, rag_rewrite) - Vector-preference correlation analysis and visualization scripts - Local reward model batch processing improvements - Updated CLAUDE.md with full experiment documentation and notes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'collaborativeagents/scripts/test_new_rewrite.sh')
-rwxr-xr-xcollaborativeagents/scripts/test_new_rewrite.sh50
1 files changed, 50 insertions, 0 deletions
diff --git a/collaborativeagents/scripts/test_new_rewrite.sh b/collaborativeagents/scripts/test_new_rewrite.sh
new file mode 100755
index 0000000..1ade8ea
--- /dev/null
+++ b/collaborativeagents/scripts/test_new_rewrite.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# 小规模测试:验证新的rewrite prompt是否降低E/T
+# 10 profiles × 10 sessions = 100 sessions
+
+echo "$(date '+%H:%M:%S') 启动 rag_rewrite 小规模测试 (新prompt)..."
+
+cd /workspace/personalization-user-model
+
+python collaborativeagents/scripts/run_experiments.py \
+ --methods rag_rewrite \
+ --datasets math-hard,bigcodebench \
+ --n-profiles 10 \
+ --n-sessions 10 \
+ --max-turns 10 \
+ --use-vllm \
+ --vllm-agent-url http://localhost:8003/v1 \
+ --vllm-user-url http://localhost:8004/v1 \
+ --use-batch-processing \
+ --batch-size 4 \
+ --parallel-profiles 10 \
+ --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \
+ --output-dir collaborativeagents/results/test_new_rewrite_10x10
+
+echo "$(date '+%H:%M:%S') 测试完成"
+
+# 自动分析结果
+python3 << 'ANALYZE'
+import json
+import numpy as np
+
+result_path = "collaborativeagents/results/test_new_rewrite_10x10"
+import glob
+results_file = glob.glob(f"{result_path}/*/rag_rewrite/results.json")
+
+if results_file:
+ with open(results_file[0]) as f:
+ data = json.load(f)
+
+ enforcements = sum(r["metrics"]["enforcement_count"] for r in data)
+ turns = sum(r["metrics"]["total_turns"] for r in data)
+ successes = sum(1 for r in data if r["metrics"]["task_success"])
+
+ print(f"\n=== 新Rewrite Prompt测试结果 ===")
+ print(f"Sessions: {len(data)}")
+ print(f"Success Rate: {100*successes/len(data):.1f}%")
+ print(f"E/T: {enforcements/turns:.4f}")
+ print(f"(对比旧rewrite E/T: 0.194)")
+else:
+ print("结果文件未找到")
+ANALYZE