Add RAG rewrite, 60-session experiment scripts, and analysis tools

- RAG rewrite adapter and vector preference pipeline in personalized_llm - 60-session experiment queue scripts (reflection, rag, rag_vector, rag_rewrite) - Vector-preference correlation analysis and visualization scripts - Local reward model batch processing improvements - Updated CLAUDE.md with full experiment documentation and notes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
author: YurenHao0426 <blackhao0426@gmail.com> 2026-02-10 20:16:36 +0000
committer: YurenHao0426 <blackhao0426@gmail.com> 2026-02-10 20:16:36 +0000
commit: 5626080ca4c4219aec4888d6b9406d0d3349fb55 (patch)
tree: 86287d9fd5833e11ccd78566992540f2664fd195 /src/personalization/models/llm/vllm_chat.py
parent: a2036838807428424bbbaff507a6563749a83145 (diff)
1 files changed, 32 insertions, 5 deletions
diff --git a/src/personalization/models/llm/vllm_chat.py b/src/personalization/models/llm/vllm_chat.py
index b5c3a05..d577a30 100644
--- a/src/personalization/models/llm/vllm_chat.py
+++ b/src/personalization/models/llm/vllm_chat.py
@@ -78,27 +78,53 @@ class VLLMChatModel(ChatModel):
         history: List[ChatTurn],
         memory_notes: List[str],
         max_new_tokens: int = 512,
+        global_notes: List[str] = None,
     ) -> List[dict]:
         """Build messages list for chat completion API with auto-truncation.
 
         If the context exceeds max_context_length, older conversation turns
         are removed to keep only the most recent context that fits.
+
+        Args:
+            global_notes: If provided, these are always-applicable preferences
+                displayed in a separate section from task-specific retrieved notes.
         """
         # Use CollaborativeAgents-style system prompt
-        if memory_notes:
-            bullet = "\n".join(f"- {n}" for n in memory_notes)
+        has_any_notes = memory_notes or global_notes
+        if has_any_notes:
+            # Build preference sections
+            pref_sections = ""
+            if global_notes:
+                global_bullet = "\n".join(f"- {n}" for n in global_notes)
+                pref_sections += f"## General Preferences (always apply)\n{global_bullet}\n\n"
+            if memory_notes:
+                task_bullet = "\n".join(f"- {n}" for n in memory_notes)
+                if global_notes:
+                    pref_sections += f"## Task-Specific Preferences\n{task_bullet}\n"
+                else:
+                    pref_sections += f"{task_bullet}\n"
+
             system_content = (
                 "You are a collaborative AI agent helping users solve writing, question answering, math, and coding problems.\n\n"
                 "# User Preferences\n"
                 "The user has a set of preferences for how you should behave. If you do not follow these preferences, "
                 "the user will be unable to learn from your response and you will need to adjust your response to adhere "
-                "to these preferences (so it is best to follow them initially).\n"
+                "to these preferences (so it is best to follow them initially).\n\n"
+                "**IMPORTANT**: If the user explicitly requests something in THIS conversation (e.g., asks you to change "
+                "your format, style, or approach), that request takes PRIORITY over the remembered preferences below. "
+                "Always adapt to the user's direct feedback first.\n\n"
                 "Based on your past interactions with the user, you have maintained a set of notes about the user's preferences:\n"
-                f"{bullet}\n\n"
+                f"{pref_sections}\n"
+                "# Before Responding\n"
+                "Before writing your response, briefly consider:\n"
+                "1. Which preferences above are relevant to this specific request?\n"
+                "2. How will you satisfy each relevant preference in your response?\n\n"
                 "# Conversation Guidelines:\n"
+                "- If the user asks you to adjust your response (e.g., 'be more concise', 'focus on intuition'), you MUST change your approach accordingly. Do NOT repeat the same response.\n"
                 "- If the user's message is unclear, lacks details, or is ambiguous (e.g. length of an essay, format requirements, "
                 "specific constraints), do not make assumptions. Ask for clarification and ensure you have enough information before providing an answer.\n"
                 "- Your goal is to help the user solve their problem. Adhere to their preferences and do your best to help them solve their problem.\n"
+                "- **Verify**: Before finalizing, check that your response satisfies the relevant preferences listed above.\n"
             )
         else:
             # Vanilla mode - no preferences
@@ -152,13 +178,14 @@ class VLLMChatModel(ChatModel):
         history: List[ChatTurn],
         memory_notes: List[str],
         max_new_tokens: int = 512,
+        global_notes: List[str] = None,
     ) -> List[dict]:
         """Public method to build messages without calling the API.
 
         Used for batch processing where messages are collected first,
         then sent in batch to vLLM for concurrent processing.
         """
-        return self._build_messages(history, memory_notes, max_new_tokens)
+        return self._build_messages(history, memory_notes, max_new_tokens, global_notes=global_notes)
 
     def answer(
         self,
author	YurenHao0426 <blackhao0426@gmail.com>	2026-02-10 20:16:36 +0000
committer	YurenHao0426 <blackhao0426@gmail.com>	2026-02-10 20:16:36 +0000
commit	5626080ca4c4219aec4888d6b9406d0d3349fb55 (patch)
tree	86287d9fd5833e11ccd78566992540f2664fd195 /src/personalization/models/llm/vllm_chat.py
parent	a2036838807428424bbbaff507a6563749a83145 (diff)