diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-27 09:57:37 -0600 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-27 09:57:37 -0600 |
| commit | dc801c07cf38b0c495686463e6ca6f871a64440e (patch) | |
| tree | 599f03114775921dbc472403c701f4a3a8ea188a /src/personalization/evaluation/preference_bank/generator.py | |
| parent | e43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 (diff) | |
Add collaborativeagents module and update gitignore
- Add collaborativeagents subproject with adapters, agents, and evaluation modules
- Update .gitignore to exclude large binary files (.whl, .tar), wandb logs, and results
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'src/personalization/evaluation/preference_bank/generator.py')
| -rw-r--r-- | src/personalization/evaluation/preference_bank/generator.py | 530 |
1 files changed, 530 insertions, 0 deletions
diff --git a/src/personalization/evaluation/preference_bank/generator.py b/src/personalization/evaluation/preference_bank/generator.py new file mode 100644 index 0000000..e256b86 --- /dev/null +++ b/src/personalization/evaluation/preference_bank/generator.py @@ -0,0 +1,530 @@ +""" +Preference Bank Generator + +Uses LLM to automatically generate diverse user preferences for each topic. +""" + +import json +import os +from typing import List, Dict, Any, Optional +from dataclasses import dataclass + +from .schemas import PreferenceItem, PreferenceTopic, PreferenceBank + + +# Topic definitions for the demo (5 topics) +DEMO_TOPICS = { + "math_formatting": { + "description": "How mathematical content should be formatted (LaTeX, plain text, markdown)", + "related_datasets": ["math-hard", "math-500", "gpqa"], + "generation_hints": [ + "LaTeX formatting for equations", + "Plain text vs mathematical notation", + "Inline vs block equations", + "Step-by-step calculation display", + "Variable naming conventions", + ], + }, + "coding_style": { + "description": "Preferences for code formatting, language choice, and documentation", + "related_datasets": ["humaneval", "bigcodebench"], + "generation_hints": [ + "Programming language preference (Python, JavaScript, etc.)", + "Type hints and annotations", + "Docstrings and comments", + "Code structure and organization", + "Naming conventions", + ], + }, + "response_structure": { + "description": "How responses should be organized (bullets, numbered lists, prose)", + "related_datasets": ["all"], + "generation_hints": [ + "Bullet points vs numbered lists vs prose", + "Headers and sections", + "TL;DR summaries", + "Outline before detailed explanation", + "Logical flow and transitions", + ], + }, + "explanation_depth": { + "description": "Level of detail and thoroughness in explanations", + "related_datasets": ["all"], + "generation_hints": [ + "Concise vs comprehensive", + "Examples and analogies", + "Background context", + "Assumptions stated explicitly", + "Multiple approaches/alternatives", + ], + }, + "interaction_style": { + "description": "How the agent should interact (questions, confirmations, suggestions)", + "related_datasets": ["all"], + "generation_hints": [ + "Asking clarifying questions", + "Step-by-step vs holistic answers", + "Proactive suggestions", + "Confidence levels in answers", + "Politeness and tone", + ], + }, +} + + +# LLM prompt template for generating preferences +GENERATION_PROMPT = '''You are helping design a user preference benchmark. Generate {num_prefs} diverse user preferences for the topic: "{topic_name}" + +Topic Description: {topic_description} + +Hints for preference types: +{hints} + +For each preference, provide a JSON object with: +1. "condition": When this preference applies (e.g., "when solving math problems", "when explaining code") +2. "action": What the user prefers (be specific and enforceable) +3. "conflict_group": If this preference conflicts with others in the list, give them the same group name (e.g., "notation_style"). Use null if no conflict. +4. "enforce_description": How a user would detect violation and enforce this preference +5. "example_violation": A concrete example of an agent response that violates this +6. "example_compliance": A concrete example that follows this preference + +Requirements: +- Make preferences SPECIFIC and ENFORCEABLE (not vague like "be helpful") +- Include 2-3 pairs of CONFLICTING preferences (same conflict_group) - this is important for testing RAG +- Vary specificity: some broad ("always use Python"), some narrow ("use f-strings for string formatting in Python") +- Preferences should be realistic things users actually care about + +Output as a JSON array of objects. Only output the JSON array, no other text. +''' + + +class PreferenceBankGenerator: + """Generates a preference bank using LLM.""" + + def __init__( + self, + llm_client: Any = None, + model_name: str = "gpt-4o-mini", # Default to a capable but fast model + ): + """ + Args: + llm_client: OpenAI-compatible client. If None, will create one. + model_name: Model to use for generation. + """ + self.model_name = model_name + + if llm_client is None: + try: + import openai + self.client = openai.OpenAI() + except Exception as e: + print(f"Warning: Could not initialize OpenAI client: {e}") + self.client = None + else: + self.client = llm_client + + def generate_preferences_for_topic( + self, + topic_name: str, + topic_description: str, + hints: List[str], + num_prefs: int = 5, + ) -> List[PreferenceItem]: + """Generate preferences for a single topic using LLM.""" + + if self.client is None: + print(f"No LLM client available, using fallback for topic: {topic_name}") + return self._generate_fallback_preferences(topic_name, num_prefs) + + hints_text = "\n".join(f"- {h}" for h in hints) + + prompt = GENERATION_PROMPT.format( + num_prefs=num_prefs, + topic_name=topic_name, + topic_description=topic_description, + hints=hints_text, + ) + + try: + response = self.client.chat.completions.create( + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + temperature=0.8, + max_tokens=4000, + ) + + content = response.choices[0].message.content.strip() + + # Parse JSON + # Handle potential markdown code blocks + if content.startswith("```"): + content = content.split("```")[1] + if content.startswith("json"): + content = content[4:] + + prefs_data = json.loads(content) + + # Convert to PreferenceItem objects + preferences = [] + for i, pref_dict in enumerate(prefs_data): + pref_id = f"{topic_name[:4]}_{i+1:03d}" + pref = PreferenceItem( + id=pref_id, + topic=topic_name, + condition=pref_dict.get("condition", ""), + action=pref_dict.get("action", ""), + conflict_group=pref_dict.get("conflict_group"), + enforce_description=pref_dict.get("enforce_description", ""), + example_violation=pref_dict.get("example_violation", ""), + example_compliance=pref_dict.get("example_compliance", ""), + ) + preferences.append(pref) + + return preferences + + except Exception as e: + print(f"Error generating preferences for {topic_name}: {e}") + return self._generate_fallback_preferences(topic_name, num_prefs) + + def _generate_fallback_preferences( + self, + topic_name: str, + num_prefs: int = 5, + ) -> List[PreferenceItem]: + """Generate hardcoded fallback preferences when LLM is not available.""" + + fallbacks = { + "math_formatting": [ + PreferenceItem( + id="math_001", topic="math_formatting", + condition="solving math problems", + action="use LaTeX for all formulas and equations", + conflict_group="math_notation", + enforce_description="Check if mathematical expressions use LaTeX syntax like $x^2$ or $$\\int$$", + example_violation="The answer is x squared plus 2x plus 1", + example_compliance="The answer is $x^2 + 2x + 1$", + ), + PreferenceItem( + id="math_002", topic="math_formatting", + condition="explaining mathematical concepts", + action="use plain text only, avoid any mathematical notation", + conflict_group="math_notation", + enforce_description="Check if response contains any LaTeX or special math symbols", + example_violation="We need to find $\\frac{d}{dx}(x^2)$", + example_compliance="We need to find the derivative of x squared", + ), + PreferenceItem( + id="math_003", topic="math_formatting", + condition="showing multi-step calculations", + action="display each step on a separate line with clear labels", + conflict_group=None, + enforce_description="Check if steps are on separate lines with labels like 'Step 1:'", + example_violation="First we add 2+3=5, then multiply by 4 to get 20", + example_compliance="Step 1: Add 2 + 3 = 5\nStep 2: Multiply by 4: 5 × 4 = 20", + ), + PreferenceItem( + id="math_004", topic="math_formatting", + condition="presenting final answers", + action="clearly box or highlight the final answer", + conflict_group=None, + enforce_description="Check if final answer is visually distinguished", + example_violation="So x equals 5.", + example_compliance="**Final Answer: x = 5**", + ), + PreferenceItem( + id="math_005", topic="math_formatting", + condition="solving problems with multiple variables", + action="use single-letter variables (x, y, z) rather than descriptive names", + conflict_group="var_naming", + enforce_description="Check if variables are single letters", + example_violation="Let price = 100 and quantity = 5", + example_compliance="Let p = 100 and q = 5", + ), + ], + "coding_style": [ + PreferenceItem( + id="code_001", topic="coding_style", + condition="providing code examples", + action="always use Python", + conflict_group="language", + enforce_description="Check if code is written in Python", + example_violation="```javascript\nfunction add(a, b) { return a + b; }\n```", + example_compliance="```python\ndef add(a, b):\n return a + b\n```", + ), + PreferenceItem( + id="code_002", topic="coding_style", + condition="providing code examples", + action="always use JavaScript or TypeScript", + conflict_group="language", + enforce_description="Check if code is written in JavaScript/TypeScript", + example_violation="```python\ndef add(a, b): return a + b\n```", + example_compliance="```javascript\nconst add = (a, b) => a + b;\n```", + ), + PreferenceItem( + id="code_003", topic="coding_style", + condition="writing Python functions", + action="always include type hints for parameters and return values", + conflict_group=None, + enforce_description="Check if function has type hints", + example_violation="def add(a, b):\n return a + b", + example_compliance="def add(a: int, b: int) -> int:\n return a + b", + ), + PreferenceItem( + id="code_004", topic="coding_style", + condition="writing functions", + action="include a docstring explaining the function", + conflict_group=None, + enforce_description="Check if function has a docstring", + example_violation="def add(a, b):\n return a + b", + example_compliance='def add(a, b):\n """Add two numbers and return the result."""\n return a + b', + ), + PreferenceItem( + id="code_005", topic="coding_style", + condition="writing code", + action="minimize comments, code should be self-documenting", + conflict_group="comment_style", + enforce_description="Check if there are excessive inline comments", + example_violation="x = x + 1 # increment x by 1", + example_compliance="x += 1", + ), + ], + "response_structure": [ + PreferenceItem( + id="struct_001", topic="response_structure", + condition="providing multi-point answers", + action="use bullet points with '-' or '*'", + conflict_group="list_style", + enforce_description="Check if response uses bullet points", + example_violation="First, do X. Second, do Y. Third, do Z.", + example_compliance="- First, do X\n- Second, do Y\n- Third, do Z", + ), + PreferenceItem( + id="struct_002", topic="response_structure", + condition="providing step-by-step instructions", + action="use numbered lists", + conflict_group="list_style", + enforce_description="Check if response uses numbered lists", + example_violation="First do X, then do Y, finally do Z.", + example_compliance="1. Do X\n2. Do Y\n3. Do Z", + ), + PreferenceItem( + id="struct_003", topic="response_structure", + condition="writing explanations", + action="use flowing prose paragraphs, avoid lists", + conflict_group="list_style", + enforce_description="Check if response uses prose instead of lists", + example_violation="Key points:\n- Point 1\n- Point 2", + example_compliance="The key insight here is that Point 1 connects to Point 2 through...", + ), + PreferenceItem( + id="struct_004", topic="response_structure", + condition="providing long explanations", + action="include a TL;DR summary at the end", + conflict_group=None, + enforce_description="Check if response ends with TL;DR", + example_violation="... and that's how it works.", + example_compliance="... and that's how it works.\n\n**TL;DR:** X does Y by Z.", + ), + PreferenceItem( + id="struct_005", topic="response_structure", + condition="explaining complex topics", + action="start with an outline of what will be covered", + conflict_group=None, + enforce_description="Check if response starts with an outline", + example_violation="Let me explain recursion. First, understand that...", + example_compliance="I'll cover: 1) What is recursion, 2) How it works, 3) Examples.\n\n**1) What is recursion**...", + ), + ], + "explanation_depth": [ + PreferenceItem( + id="depth_001", topic="explanation_depth", + condition="answering questions", + action="be concise, no more than 3 sentences", + conflict_group="length", + enforce_description="Count sentences, should be 3 or fewer", + example_violation="Let me explain in detail. First... Second... Third... Fourth... Fifth...", + example_compliance="The answer is X. This works because of Y. Here's how to apply it: Z.", + ), + PreferenceItem( + id="depth_002", topic="explanation_depth", + condition="explaining concepts", + action="provide comprehensive, detailed explanations", + conflict_group="length", + enforce_description="Check if explanation is thorough with multiple aspects covered", + example_violation="It's X. Done.", + example_compliance="Let me explain X in detail. The concept originates from... It works by... Common applications include... Here's an example...", + ), + PreferenceItem( + id="depth_003", topic="explanation_depth", + condition="explaining anything", + action="always include at least one concrete example", + conflict_group=None, + enforce_description="Check if at least one example is provided", + example_violation="A binary tree is a data structure where each node has at most two children.", + example_compliance="A binary tree is a data structure where each node has at most two children. For example, in [5, 3, 7], 5 is the root, 3 is left child, 7 is right child.", + ), + PreferenceItem( + id="depth_004", topic="explanation_depth", + condition="explaining technical concepts", + action="use analogies from everyday life", + conflict_group=None, + enforce_description="Check if explanation includes an everyday analogy", + example_violation="A stack is a LIFO data structure.", + example_compliance="A stack is like a stack of plates - you can only take the top one (LIFO).", + ), + PreferenceItem( + id="depth_005", topic="explanation_depth", + condition="solving problems", + action="state assumptions explicitly before solving", + conflict_group=None, + enforce_description="Check if assumptions are stated upfront", + example_violation="The answer is 42.", + example_compliance="Assuming n is positive and integer, the answer is 42.", + ), + ], + "interaction_style": [ + PreferenceItem( + id="inter_001", topic="interaction_style", + condition="receiving unclear requests", + action="ask clarifying questions before attempting to answer", + conflict_group="clarification", + enforce_description="Check if agent asks questions when request is ambiguous", + example_violation="Here's a solution assuming you meant X...", + example_compliance="Before I help, could you clarify: do you mean X or Y?", + ), + PreferenceItem( + id="inter_002", topic="interaction_style", + condition="receiving requests", + action="make reasonable assumptions and proceed without asking", + conflict_group="clarification", + enforce_description="Check if agent proceeds with reasonable assumptions", + example_violation="What exactly do you mean by 'large'? What size range?", + example_compliance="Assuming you mean 'large' as over 1000 items, here's the solution...", + ), + PreferenceItem( + id="inter_003", topic="interaction_style", + condition="solving multi-step problems", + action="present one step at a time and ask for confirmation before proceeding", + conflict_group="pacing", + enforce_description="Check if agent pauses after each step", + example_violation="Step 1: X. Step 2: Y. Step 3: Z. Done!", + example_compliance="Step 1: X. Does this make sense? Should I continue to Step 2?", + ), + PreferenceItem( + id="inter_004", topic="interaction_style", + condition="solving problems", + action="provide the complete solution at once without pausing", + conflict_group="pacing", + enforce_description="Check if agent gives complete solution without asking to continue", + example_violation="First, let me do step 1... Should I continue?", + example_compliance="Here's the complete solution: Step 1: X, Step 2: Y, Step 3: Z.", + ), + PreferenceItem( + id="inter_005", topic="interaction_style", + condition="providing answers", + action="include a confidence level (e.g., 'I'm 90% confident')", + conflict_group=None, + enforce_description="Check if response includes confidence level", + example_violation="The answer is 42.", + example_compliance="I'm about 95% confident the answer is 42.", + ), + ], + } + + if topic_name in fallbacks: + return fallbacks[topic_name][:num_prefs] + else: + # Generic fallback + return [ + PreferenceItem( + id=f"{topic_name[:4]}_{i+1:03d}", + topic=topic_name, + condition=f"interacting about {topic_name}", + action=f"preference {i+1} for {topic_name}", + conflict_group=None, + enforce_description=f"Check preference {i+1}", + example_violation=f"Violation example {i+1}", + example_compliance=f"Compliance example {i+1}", + ) + for i in range(num_prefs) + ] + + def generate_bank( + self, + topics: Dict[str, Dict] = None, + prefs_per_topic: int = 5, + ) -> PreferenceBank: + """Generate a complete preference bank.""" + + if topics is None: + topics = DEMO_TOPICS + + bank = PreferenceBank() + + for topic_name, topic_config in topics.items(): + print(f"Generating preferences for topic: {topic_name}...") + + preferences = self.generate_preferences_for_topic( + topic_name=topic_name, + topic_description=topic_config["description"], + hints=topic_config.get("generation_hints", []), + num_prefs=prefs_per_topic, + ) + + topic = PreferenceTopic( + name=topic_name, + description=topic_config["description"], + related_datasets=topic_config["related_datasets"], + preferences=preferences, + ) + + bank.add_topic(topic) + print(f" Generated {len(preferences)} preferences") + + return bank + + +def generate_demo_bank( + output_path: str = None, + use_llm: bool = False, + prefs_per_topic: int = 5, +) -> PreferenceBank: + """ + Generate a demo preference bank. + + Args: + output_path: If provided, save bank to this path + use_llm: If True, use LLM to generate. If False, use hardcoded fallbacks. + prefs_per_topic: Number of preferences per topic + + Returns: + Generated PreferenceBank + """ + if use_llm: + generator = PreferenceBankGenerator() + else: + generator = PreferenceBankGenerator(llm_client=None) # Use fallbacks + + bank = generator.generate_bank( + topics=DEMO_TOPICS, + prefs_per_topic=prefs_per_topic, + ) + + if output_path: + bank.save(output_path) + print(f"Saved bank to {output_path}") + + print(f"\nBank Statistics: {bank.stats()}") + + return bank + + +if __name__ == "__main__": + # Generate demo bank with fallback preferences + import os + script_dir = os.path.dirname(os.path.abspath(__file__)) + output_path = os.path.join(script_dir, "bank_demo.json") + + bank = generate_demo_bank(output_path=output_path, use_llm=False) + + |
