summaryrefslogtreecommitdiff
path: root/collaborativeagents/adapters/reflection_grpo_adapter.py
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-02-10 20:16:36 +0000
committerYurenHao0426 <blackhao0426@gmail.com>2026-02-10 20:16:36 +0000
commit5626080ca4c4219aec4888d6b9406d0d3349fb55 (patch)
tree86287d9fd5833e11ccd78566992540f2664fd195 /collaborativeagents/adapters/reflection_grpo_adapter.py
parenta2036838807428424bbbaff507a6563749a83145 (diff)
Add RAG rewrite, 60-session experiment scripts, and analysis tools
- RAG rewrite adapter and vector preference pipeline in personalized_llm - 60-session experiment queue scripts (reflection, rag, rag_vector, rag_rewrite) - Vector-preference correlation analysis and visualization scripts - Local reward model batch processing improvements - Updated CLAUDE.md with full experiment documentation and notes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'collaborativeagents/adapters/reflection_grpo_adapter.py')
-rw-r--r--collaborativeagents/adapters/reflection_grpo_adapter.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/collaborativeagents/adapters/reflection_grpo_adapter.py b/collaborativeagents/adapters/reflection_grpo_adapter.py
index 09c5b26..3c10942 100644
--- a/collaborativeagents/adapters/reflection_grpo_adapter.py
+++ b/collaborativeagents/adapters/reflection_grpo_adapter.py
@@ -18,10 +18,11 @@ import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from json_repair import repair_json
-# Model paths - Use GRPO-trained model if available, fallback to base
-GRPO_MODEL_PATH = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/outputs/grpo_reflection/final"
-SFT_MODEL_PATH = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/outputs/sft_reflection"
-DEFAULT_MODEL_PATH = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct"
+# Model paths - computed relative to project root
+_PROJECT_ROOT = Path(__file__).parent.parent.parent
+GRPO_MODEL_PATH = str(_PROJECT_ROOT / "collaborativeagents/training/outputs/grpo_reflection/final")
+SFT_MODEL_PATH = str(_PROJECT_ROOT / "collaborativeagents/training/outputs/sft_reflection")
+DEFAULT_MODEL_PATH = str(_PROJECT_ROOT / "models/llama-3.1-8b-instruct")
def get_best_available_model():
"""Get the best available model path (GRPO > SFT > base)."""