diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-11 00:21:34 +0000 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-11 00:21:34 +0000 |
| commit | 08a08befe8ac0202ce952b88aa144cbffe2a73f9 (patch) | |
| tree | 0924f46f398cff21c22c16affb70cbfdc770c03f | |
| parent | d5e38fb958403563bf90e884d96660731b1e0379 (diff) | |
Add dual-vector ablation experiment (z_short/z_long necessity)
- Add rag_vector_no_short (eta_short=0) and rag_vector_no_long (eta_long=0) configs
- Add ablation run script for 60p60s experiment
- Document ablation design and expected results in notes
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
| -rw-r--r-- | collaborativeagents/adapters/personalized_llm_adapter.py | 34 | ||||
| -rwxr-xr-x | collaborativeagents/scripts/queue_ablation_zshort.sh | 47 | ||||
| -rw-r--r-- | collaborativeagents/scripts/run_experiments.py | 2 | ||||
| -rw-r--r-- | notes.md | 26 |
4 files changed, 109 insertions, 0 deletions
diff --git a/collaborativeagents/adapters/personalized_llm_adapter.py b/collaborativeagents/adapters/personalized_llm_adapter.py index 488b241..4189f31 100644 --- a/collaborativeagents/adapters/personalized_llm_adapter.py +++ b/collaborativeagents/adapters/personalized_llm_adapter.py @@ -672,6 +672,40 @@ def create_baseline_adapter( "extractor": "cuda:2", }, ), + # Ablation: RAG + Vector without z_short (only z_long, no within-session adaptation) + "rag_vector_no_short": AdapterConfig( + mode="full", + enable_preference_extraction=True, + enable_rl_updates=True, + use_user_vector=True, + llm_name=llm_name, + use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + eta_short=0.0, # Disable z_short learning + device_assignment={ + "embed": "cuda:2", + "reranker": "cuda:3", + "extractor": "cuda:2", + }, + ), + # Ablation: RAG + Vector without z_long (only z_short, no cross-session learning) + "rag_vector_no_long": AdapterConfig( + mode="full", + enable_preference_extraction=True, + enable_rl_updates=True, + use_user_vector=True, + llm_name=llm_name, + use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + eta_long=0.0, # Disable z_long learning + device_assignment={ + "embed": "cuda:2", + "reranker": "cuda:3", + "extractor": "cuda:2", + }, + ), # Baseline 7a: RAG + Vector + Preference Rewrite (combines best of both) "rag_rewrite_vector": AdapterConfig( mode="full", diff --git a/collaborativeagents/scripts/queue_ablation_zshort.sh b/collaborativeagents/scripts/queue_ablation_zshort.sh new file mode 100755 index 0000000..b4879c7 --- /dev/null +++ b/collaborativeagents/scripts/queue_ablation_zshort.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Ablation experiment: z_short necessity +# Run rag_vector_no_short (z_long only) and rag_vector_no_long (z_short only) +# Same setup as 60s main experiment for direct comparison + +echo "Starting z_short ablation experiments..." +echo "$(date '+%Y-%m-%d %H:%M:%S')" + +# First: run rag_vector_no_short (only z_long, no within-session adaptation) +echo "Starting rag_vector_no_short..." +python collaborativeagents/scripts/run_experiments.py \ + --methods rag_vector_no_short \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 60 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/ablation_no_short_60s \ + 2>&1 | tee collaborativeagents/results/ablation_no_short_60s.log + +echo "rag_vector_no_short completed at $(date '+%Y-%m-%d %H:%M:%S')" + +# Then: run rag_vector_no_long (only z_short, no cross-session learning) +echo "Starting rag_vector_no_long..." +python collaborativeagents/scripts/run_experiments.py \ + --methods rag_vector_no_long \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 60 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/ablation_no_long_60s \ + 2>&1 | tee collaborativeagents/results/ablation_no_long_60s.log + +echo "Both ablation experiments completed at $(date '+%Y-%m-%d %H:%M:%S')" diff --git a/collaborativeagents/scripts/run_experiments.py b/collaborativeagents/scripts/run_experiments.py index da3549b..806b959 100644 --- a/collaborativeagents/scripts/run_experiments.py +++ b/collaborativeagents/scripts/run_experiments.py @@ -118,6 +118,8 @@ AVAILABLE_METHODS = { "rag_rewrite": "Extractor + RAG with LLM preference rewrite/merge", "rag_rewrite_vector": "Extractor + RAG + user vector + LLM preference rewrite", "rag_vector": "Extractor + RAG + user vector (proposed method)", + "rag_vector_no_short": "Ablation: RAG + user vector without z_short (long-term only)", + "rag_vector_no_long": "Ablation: RAG + user vector without z_long (short-term only)", "rag_vector_fast": "Extractor + RAG + user vector with 10x learning rate", "rag_vector_consolidate": "Extractor + RAG + user vector with session-level preference consolidation", "rag_vector_balanced": "Extractor + RAG + user vector with balanced rewards (10x LR + positive signal for good turns)", @@ -97,6 +97,32 @@ z_short 不和长期 preference Jaccard 相关是**符合预期的**: **可视化**: `collaborativeagents/results/fig_main_results.png` panel (c) +### z_short Ablation Experiment (02/10 启动) + +**目的**: 通过消融实验证明dual-vector中z_short的必要性 + +**实验设计**: +| 条件 | eta_long | eta_short | beta_long | beta_short | 说明 | +|------|----------|-----------|-----------|------------|------| +| Full (baseline) | 0.01 | 0.05 | 2.0 | 5.0 | 完整dual-vector (已完成的60s实验) | +| No z_short | 0.01 | **0.0** | 2.0 | 5.0 | 禁用session内适应 | +| No z_long | **0.0** | 0.05 | 2.0 | 5.0 | 禁用跨session学习 | + +**配置**: 60 profiles × 60 sessions, max 10 turns, same as main experiment +**方法名**: `rag_vector_no_short`, `rag_vector_no_long` + +**预期结果**: +1. **No z_short** (z_long only): 跨session学习正常,但session内第1-2 turn后如果agent犯错无法快速修正偏好权重 → 预计E/T会更高(更多enforcement) +2. **No z_long** (z_short only): 每个session从零开始(z_short在session开始时reset),完全没有跨session记忆 → 预计后期session表现差,没有学习曲线 +3. **Full** (both): 最佳表现,z_long提供跨session基础,z_short提供session内微调 + +**关键对比指标**: +- **E/T per-session曲线**: No z_short应该在early turns有更高E/T +- **Success rate over sessions**: No z_long应该flat,Full应该upward +- **Late-session performance**: Full应该最好,No z_long应该和early一样 + +**状态**: 🔄 Running (02/10 21:48 启动) + --- ## RAG vs Reflection 分析 |
