summaryrefslogtreecommitdiff
path: root/src/personalization/models/llm/vllm_chat.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/personalization/models/llm/vllm_chat.py')
-rw-r--r--src/personalization/models/llm/vllm_chat.py37
1 files changed, 32 insertions, 5 deletions
diff --git a/src/personalization/models/llm/vllm_chat.py b/src/personalization/models/llm/vllm_chat.py
index b5c3a05..d577a30 100644
--- a/src/personalization/models/llm/vllm_chat.py
+++ b/src/personalization/models/llm/vllm_chat.py
@@ -78,27 +78,53 @@ class VLLMChatModel(ChatModel):
history: List[ChatTurn],
memory_notes: List[str],
max_new_tokens: int = 512,
+ global_notes: List[str] = None,
) -> List[dict]:
"""Build messages list for chat completion API with auto-truncation.
If the context exceeds max_context_length, older conversation turns
are removed to keep only the most recent context that fits.
+
+ Args:
+ global_notes: If provided, these are always-applicable preferences
+ displayed in a separate section from task-specific retrieved notes.
"""
# Use CollaborativeAgents-style system prompt
- if memory_notes:
- bullet = "\n".join(f"- {n}" for n in memory_notes)
+ has_any_notes = memory_notes or global_notes
+ if has_any_notes:
+ # Build preference sections
+ pref_sections = ""
+ if global_notes:
+ global_bullet = "\n".join(f"- {n}" for n in global_notes)
+ pref_sections += f"## General Preferences (always apply)\n{global_bullet}\n\n"
+ if memory_notes:
+ task_bullet = "\n".join(f"- {n}" for n in memory_notes)
+ if global_notes:
+ pref_sections += f"## Task-Specific Preferences\n{task_bullet}\n"
+ else:
+ pref_sections += f"{task_bullet}\n"
+
system_content = (
"You are a collaborative AI agent helping users solve writing, question answering, math, and coding problems.\n\n"
"# User Preferences\n"
"The user has a set of preferences for how you should behave. If you do not follow these preferences, "
"the user will be unable to learn from your response and you will need to adjust your response to adhere "
- "to these preferences (so it is best to follow them initially).\n"
+ "to these preferences (so it is best to follow them initially).\n\n"
+ "**IMPORTANT**: If the user explicitly requests something in THIS conversation (e.g., asks you to change "
+ "your format, style, or approach), that request takes PRIORITY over the remembered preferences below. "
+ "Always adapt to the user's direct feedback first.\n\n"
"Based on your past interactions with the user, you have maintained a set of notes about the user's preferences:\n"
- f"{bullet}\n\n"
+ f"{pref_sections}\n"
+ "# Before Responding\n"
+ "Before writing your response, briefly consider:\n"
+ "1. Which preferences above are relevant to this specific request?\n"
+ "2. How will you satisfy each relevant preference in your response?\n\n"
"# Conversation Guidelines:\n"
+ "- If the user asks you to adjust your response (e.g., 'be more concise', 'focus on intuition'), you MUST change your approach accordingly. Do NOT repeat the same response.\n"
"- If the user's message is unclear, lacks details, or is ambiguous (e.g. length of an essay, format requirements, "
"specific constraints), do not make assumptions. Ask for clarification and ensure you have enough information before providing an answer.\n"
"- Your goal is to help the user solve their problem. Adhere to their preferences and do your best to help them solve their problem.\n"
+ "- **Verify**: Before finalizing, check that your response satisfies the relevant preferences listed above.\n"
)
else:
# Vanilla mode - no preferences
@@ -152,13 +178,14 @@ class VLLMChatModel(ChatModel):
history: List[ChatTurn],
memory_notes: List[str],
max_new_tokens: int = 512,
+ global_notes: List[str] = None,
) -> List[dict]:
"""Public method to build messages without calling the API.
Used for batch processing where messages are collected first,
then sent in batch to vLLM for concurrent processing.
"""
- return self._build_messages(history, memory_notes, max_new_tokens)
+ return self._build_messages(history, memory_notes, max_new_tokens, global_notes=global_notes)
def answer(
self,