diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-10 20:16:36 +0000 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-10 20:16:36 +0000 |
| commit | 5626080ca4c4219aec4888d6b9406d0d3349fb55 (patch) | |
| tree | 86287d9fd5833e11ccd78566992540f2664fd195 /src/personalization/models/llm/vllm_chat.py | |
| parent | a2036838807428424bbbaff507a6563749a83145 (diff) | |
Add RAG rewrite, 60-session experiment scripts, and analysis tools
- RAG rewrite adapter and vector preference pipeline in personalized_llm
- 60-session experiment queue scripts (reflection, rag, rag_vector, rag_rewrite)
- Vector-preference correlation analysis and visualization scripts
- Local reward model batch processing improvements
- Updated CLAUDE.md with full experiment documentation and notes
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'src/personalization/models/llm/vllm_chat.py')
| -rw-r--r-- | src/personalization/models/llm/vllm_chat.py | 37 |
1 files changed, 32 insertions, 5 deletions
diff --git a/src/personalization/models/llm/vllm_chat.py b/src/personalization/models/llm/vllm_chat.py index b5c3a05..d577a30 100644 --- a/src/personalization/models/llm/vllm_chat.py +++ b/src/personalization/models/llm/vllm_chat.py @@ -78,27 +78,53 @@ class VLLMChatModel(ChatModel): history: List[ChatTurn], memory_notes: List[str], max_new_tokens: int = 512, + global_notes: List[str] = None, ) -> List[dict]: """Build messages list for chat completion API with auto-truncation. If the context exceeds max_context_length, older conversation turns are removed to keep only the most recent context that fits. + + Args: + global_notes: If provided, these are always-applicable preferences + displayed in a separate section from task-specific retrieved notes. """ # Use CollaborativeAgents-style system prompt - if memory_notes: - bullet = "\n".join(f"- {n}" for n in memory_notes) + has_any_notes = memory_notes or global_notes + if has_any_notes: + # Build preference sections + pref_sections = "" + if global_notes: + global_bullet = "\n".join(f"- {n}" for n in global_notes) + pref_sections += f"## General Preferences (always apply)\n{global_bullet}\n\n" + if memory_notes: + task_bullet = "\n".join(f"- {n}" for n in memory_notes) + if global_notes: + pref_sections += f"## Task-Specific Preferences\n{task_bullet}\n" + else: + pref_sections += f"{task_bullet}\n" + system_content = ( "You are a collaborative AI agent helping users solve writing, question answering, math, and coding problems.\n\n" "# User Preferences\n" "The user has a set of preferences for how you should behave. If you do not follow these preferences, " "the user will be unable to learn from your response and you will need to adjust your response to adhere " - "to these preferences (so it is best to follow them initially).\n" + "to these preferences (so it is best to follow them initially).\n\n" + "**IMPORTANT**: If the user explicitly requests something in THIS conversation (e.g., asks you to change " + "your format, style, or approach), that request takes PRIORITY over the remembered preferences below. " + "Always adapt to the user's direct feedback first.\n\n" "Based on your past interactions with the user, you have maintained a set of notes about the user's preferences:\n" - f"{bullet}\n\n" + f"{pref_sections}\n" + "# Before Responding\n" + "Before writing your response, briefly consider:\n" + "1. Which preferences above are relevant to this specific request?\n" + "2. How will you satisfy each relevant preference in your response?\n\n" "# Conversation Guidelines:\n" + "- If the user asks you to adjust your response (e.g., 'be more concise', 'focus on intuition'), you MUST change your approach accordingly. Do NOT repeat the same response.\n" "- If the user's message is unclear, lacks details, or is ambiguous (e.g. length of an essay, format requirements, " "specific constraints), do not make assumptions. Ask for clarification and ensure you have enough information before providing an answer.\n" "- Your goal is to help the user solve their problem. Adhere to their preferences and do your best to help them solve their problem.\n" + "- **Verify**: Before finalizing, check that your response satisfies the relevant preferences listed above.\n" ) else: # Vanilla mode - no preferences @@ -152,13 +178,14 @@ class VLLMChatModel(ChatModel): history: List[ChatTurn], memory_notes: List[str], max_new_tokens: int = 512, + global_notes: List[str] = None, ) -> List[dict]: """Public method to build messages without calling the API. Used for batch processing where messages are collected first, then sent in batch to vLLM for concurrent processing. """ - return self._build_messages(history, memory_notes, max_new_tokens) + return self._build_messages(history, memory_notes, max_new_tokens, global_notes=global_notes) def answer( self, |
