#!/bin/bash # 小规模测试:验证新的rewrite prompt是否降低E/T # 10 profiles × 10 sessions = 100 sessions echo "$(date '+%H:%M:%S') 启动 rag_rewrite 小规模测试 (新prompt)..." cd /workspace/personalization-user-model python collaborativeagents/scripts/run_experiments.py \ --methods rag_rewrite \ --datasets math-hard,bigcodebench \ --n-profiles 10 \ --n-sessions 10 \ --max-turns 10 \ --use-vllm \ --vllm-agent-url http://localhost:8003/v1 \ --vllm-user-url http://localhost:8004/v1 \ --use-batch-processing \ --batch-size 4 \ --parallel-profiles 10 \ --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ --output-dir collaborativeagents/results/test_new_rewrite_10x10 echo "$(date '+%H:%M:%S') 测试完成" # 自动分析结果 python3 << 'ANALYZE' import json import numpy as np result_path = "collaborativeagents/results/test_new_rewrite_10x10" import glob results_file = glob.glob(f"{result_path}/*/rag_rewrite/results.json") if results_file: with open(results_file[0]) as f: data = json.load(f) enforcements = sum(r["metrics"]["enforcement_count"] for r in data) turns = sum(r["metrics"]["total_turns"] for r in data) successes = sum(1 for r in data if r["metrics"]["task_success"]) print(f"\n=== 新Rewrite Prompt测试结果 ===") print(f"Sessions: {len(data)}") print(f"Success Rate: {100*successes/len(data):.1f}%") print(f"E/T: {enforcements/turns:.4f}") print(f"(对比旧rewrite E/T: 0.194)") else: print("结果文件未找到") ANALYZE