diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-10 20:16:36 +0000 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-10 20:16:36 +0000 |
| commit | 5626080ca4c4219aec4888d6b9406d0d3349fb55 (patch) | |
| tree | 86287d9fd5833e11ccd78566992540f2664fd195 /src/personalization/models | |
| parent | a2036838807428424bbbaff507a6563749a83145 (diff) | |
Add RAG rewrite, 60-session experiment scripts, and analysis tools
- RAG rewrite adapter and vector preference pipeline in personalized_llm
- 60-session experiment queue scripts (reflection, rag, rag_vector, rag_rewrite)
- Vector-preference correlation analysis and visualization scripts
- Local reward model batch processing improvements
- Updated CLAUDE.md with full experiment documentation and notes
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'src/personalization/models')
| -rw-r--r-- | src/personalization/models/llm/vllm_chat.py | 37 | ||||
| -rw-r--r-- | src/personalization/models/preference_extractor/rule_extractor.py | 53 |
2 files changed, 85 insertions, 5 deletions
diff --git a/src/personalization/models/llm/vllm_chat.py b/src/personalization/models/llm/vllm_chat.py index b5c3a05..d577a30 100644 --- a/src/personalization/models/llm/vllm_chat.py +++ b/src/personalization/models/llm/vllm_chat.py @@ -78,27 +78,53 @@ class VLLMChatModel(ChatModel): history: List[ChatTurn], memory_notes: List[str], max_new_tokens: int = 512, + global_notes: List[str] = None, ) -> List[dict]: """Build messages list for chat completion API with auto-truncation. If the context exceeds max_context_length, older conversation turns are removed to keep only the most recent context that fits. + + Args: + global_notes: If provided, these are always-applicable preferences + displayed in a separate section from task-specific retrieved notes. """ # Use CollaborativeAgents-style system prompt - if memory_notes: - bullet = "\n".join(f"- {n}" for n in memory_notes) + has_any_notes = memory_notes or global_notes + if has_any_notes: + # Build preference sections + pref_sections = "" + if global_notes: + global_bullet = "\n".join(f"- {n}" for n in global_notes) + pref_sections += f"## General Preferences (always apply)\n{global_bullet}\n\n" + if memory_notes: + task_bullet = "\n".join(f"- {n}" for n in memory_notes) + if global_notes: + pref_sections += f"## Task-Specific Preferences\n{task_bullet}\n" + else: + pref_sections += f"{task_bullet}\n" + system_content = ( "You are a collaborative AI agent helping users solve writing, question answering, math, and coding problems.\n\n" "# User Preferences\n" "The user has a set of preferences for how you should behave. If you do not follow these preferences, " "the user will be unable to learn from your response and you will need to adjust your response to adhere " - "to these preferences (so it is best to follow them initially).\n" + "to these preferences (so it is best to follow them initially).\n\n" + "**IMPORTANT**: If the user explicitly requests something in THIS conversation (e.g., asks you to change " + "your format, style, or approach), that request takes PRIORITY over the remembered preferences below. " + "Always adapt to the user's direct feedback first.\n\n" "Based on your past interactions with the user, you have maintained a set of notes about the user's preferences:\n" - f"{bullet}\n\n" + f"{pref_sections}\n" + "# Before Responding\n" + "Before writing your response, briefly consider:\n" + "1. Which preferences above are relevant to this specific request?\n" + "2. How will you satisfy each relevant preference in your response?\n\n" "# Conversation Guidelines:\n" + "- If the user asks you to adjust your response (e.g., 'be more concise', 'focus on intuition'), you MUST change your approach accordingly. Do NOT repeat the same response.\n" "- If the user's message is unclear, lacks details, or is ambiguous (e.g. length of an essay, format requirements, " "specific constraints), do not make assumptions. Ask for clarification and ensure you have enough information before providing an answer.\n" "- Your goal is to help the user solve their problem. Adhere to their preferences and do your best to help them solve their problem.\n" + "- **Verify**: Before finalizing, check that your response satisfies the relevant preferences listed above.\n" ) else: # Vanilla mode - no preferences @@ -152,13 +178,14 @@ class VLLMChatModel(ChatModel): history: List[ChatTurn], memory_notes: List[str], max_new_tokens: int = 512, + global_notes: List[str] = None, ) -> List[dict]: """Public method to build messages without calling the API. Used for batch processing where messages are collected first, then sent in batch to vLLM for concurrent processing. """ - return self._build_messages(history, memory_notes, max_new_tokens) + return self._build_messages(history, memory_notes, max_new_tokens, global_notes=global_notes) def answer( self, diff --git a/src/personalization/models/preference_extractor/rule_extractor.py b/src/personalization/models/preference_extractor/rule_extractor.py index 0f743d9..42f43ed 100644 --- a/src/personalization/models/preference_extractor/rule_extractor.py +++ b/src/personalization/models/preference_extractor/rule_extractor.py @@ -119,6 +119,59 @@ class QwenRuleExtractor(PreferenceExtractor): return text[start : end + 1] return None + @torch.inference_mode() + def batch_extract_preferences(self, queries: List[str], batch_size: int = 64) -> List[Dict[str, Any]]: + """ + Batch extract preferences from multiple queries using left-padded batching. + """ + if not queries: + return [] + + # Save and set padding side for decoder-only batched generation + orig_padding_side = self.tokenizer.padding_side + self.tokenizer.padding_side = "left" + + all_results = [] + prompts = [self.build_preference_prompt(q) for q in queries] + + for start in range(0, len(prompts), batch_size): + batch_prompts = prompts[start:start + batch_size] + inputs = self.tokenizer( + batch_prompts, return_tensors="pt", padding=True, truncation=True + ).to(self.model.device) + + outputs = self.model.generate( + **inputs, + do_sample=False, + max_new_tokens=512, + pad_token_id=self.tokenizer.pad_token_id, + eos_token_id=self.tokenizer.eos_token_id, + ) + + for i in range(len(batch_prompts)): + input_len = (inputs["attention_mask"][i] == 1).sum().item() + gen_ids = outputs[i][input_len:] + text = self.tokenizer.decode(gen_ids, skip_special_tokens=True) + + try: + data = json.loads(text) + validated = PreferenceList.model_validate(data) + all_results.append(validated.model_dump()) + except Exception: + extracted_json = self._extract_json_substring(text) + if extracted_json: + try: + data = json.loads(extracted_json) + validated = PreferenceList.model_validate(data) + all_results.append(validated.model_dump()) + continue + except Exception: + pass + all_results.append({"preferences": []}) + + self.tokenizer.padding_side = orig_padding_side + return all_results + def extract_turn(self, turns: List[ChatTurn]) -> PreferenceList: """ Extract preferences from the LAST user turn in the history. |
