diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-27 09:57:37 -0600 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-27 09:57:37 -0600 |
| commit | dc801c07cf38b0c495686463e6ca6f871a64440e (patch) | |
| tree | 599f03114775921dbc472403c701f4a3a8ea188a /collaborativeagents/scripts/conflict_scenario_generator.py | |
| parent | e43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 (diff) | |
Add collaborativeagents module and update gitignore
- Add collaborativeagents subproject with adapters, agents, and evaluation modules
- Update .gitignore to exclude large binary files (.whl, .tar), wandb logs, and results
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'collaborativeagents/scripts/conflict_scenario_generator.py')
| -rw-r--r-- | collaborativeagents/scripts/conflict_scenario_generator.py | 637 |
1 files changed, 637 insertions, 0 deletions
diff --git a/collaborativeagents/scripts/conflict_scenario_generator.py b/collaborativeagents/scripts/conflict_scenario_generator.py new file mode 100644 index 0000000..9d00de8 --- /dev/null +++ b/collaborativeagents/scripts/conflict_scenario_generator.py @@ -0,0 +1,637 @@ +""" +Conflict Scenario Generator + +Generates queries that deliberately trigger preference conflicts. +The key insight: RAG naturally resolves conflicts by retrieving ONLY +the relevant preference, while context-based methods see ALL preferences +and get confused. + +Design principles: +1. Every test query should trigger 2+ conflicting preferences +2. Only ONE preference is correct given the full context +3. RAG retrieves the correct one (high similarity to query) +4. Context methods see both and often pick wrong one or try to satisfy both +""" + +import json +import random +from dataclasses import dataclass, field +from typing import Optional +from pathlib import Path + + +# ============================================================================ +# Conflict Templates +# ============================================================================ + +@dataclass +class ConflictScenario: + """A scenario that triggers a preference conflict.""" + scenario_id: str + conflict_group: str + query: str + context_cues: list # What makes the correct preference clear + triggered_prefs: list # Preference IDs that could apply + correct_pref_id: str # The one that SHOULD apply + wrong_pref_ids: list # The ones that should NOT apply + why_correct: str # Explanation for ground truth + expected_rag_behavior: str # What RAG should do + expected_context_failure: str # How context methods fail + + +# Core conflict scenarios - each designed to fail context methods +CONFLICT_TEMPLATES = { + # ========================================================================= + # FORMAT CONFLICTS + # ========================================================================= + "format_bullets_vs_numbered": [ + { + "query": "What are the steps to deploy a Docker container? Also list the common mistakes to avoid.", + "context_cues": ["steps to deploy = procedure", "list mistakes = enumeration"], + "correct_for": "both apply to different parts", + "why_context_fails": "Context sees both prefs, might use one format for everything", + "why_rag_wins": "RAG retrieves procedure-pref for deploy part, list-pref for mistakes part" + }, + { + "query": "Walk me through setting up CI/CD - what tools should I consider?", + "context_cues": ["walk through = sequential", "consider = options"], + "correct_for": "numbered for walkthrough, bullets for tools", + "why_context_fails": "Mixes formats inconsistently", + "why_rag_wins": "Retrieves appropriate format preference per section" + }, + { + "query": "How do I configure nginx? Give me the key parameters.", + "context_cues": ["how do I = procedure", "key parameters = list"], + "correct_for": "numbered steps + bulleted parameters", + "why_context_fails": "Context methods apply one format to all", + "why_rag_wins": "Separate retrieval for procedure vs enumeration context" + } + ], + + "format_answer_first_vs_buildup": [ + { + "query": "What's the time complexity of quicksort and why?", + "context_cues": ["what's = direct question", "why = needs explanation"], + "correct_for": "answer first (O(n log n)), then explain why", + "why_context_fails": "Either gives answer without why, or long buildup first", + "why_rag_wins": "Retrieves 'answer first' for 'what's', builds explanation for 'why'" + }, + { + "query": "Explain how neural networks learn - what's backpropagation?", + "context_cues": ["explain how = learning", "what's = definition needed"], + "correct_for": "build up intuition for 'how', then define backprop", + "why_context_fails": "Starts with backprop definition (answer first) losing context", + "why_rag_wins": "Identifies learning intent first, answer-seeking second" + } + ], + + # ========================================================================= + # VERBOSITY CONFLICTS + # ========================================================================= + "verbosity_concise_vs_detailed": [ + { + "query": "Quick question - how does the GIL work in Python?", + "context_cues": ["quick question = brevity cue", "GIL = complex topic"], + "correct_for": "concise (user said quick)", + "why_context_fails": "Sees 'complex topic' pref, gives long explanation", + "why_rag_wins": "Explicit brevity cue has higher retrieval score" + }, + { + "query": "Briefly explain the proof of the halting problem.", + "context_cues": ["briefly = brevity", "proof = normally detailed"], + "correct_for": "concise - user explicitly asked for brief", + "why_context_fails": "Proof preference triggers long format", + "why_rag_wins": "'Briefly' in query matches concise preference strongly" + }, + { + "query": "TL;DR on microservices vs monolith for a startup?", + "context_cues": ["TL;DR = max brevity", "comparison = could be detailed"], + "correct_for": "ultra-concise comparison", + "why_context_fails": "Comparison pref might trigger table/detailed analysis", + "why_rag_wins": "TL;DR keyword retrieves brevity preference" + }, + { + "query": "In detail, what's 2+2?", + "context_cues": ["in detail = verbosity cue", "2+2 = trivial"], + "correct_for": "brief (topic too simple for detail)", + "why_context_fails": "Might over-explain simple arithmetic", + "why_rag_wins": "Query simplicity context overrides detail cue" + } + ], + + # ========================================================================= + # CODE STYLE CONFLICTS + # ========================================================================= + "code_naming_convention": [ + { + "query": "Write a function to parse JSON, show it in Python and JavaScript.", + "context_cues": ["Python = snake_case", "JavaScript = camelCase"], + "correct_for": "snake_case for Python version, camelCase for JS version", + "why_context_fails": "Picks one convention for both, or inconsistent", + "why_rag_wins": "Language detection triggers correct convention per block" + }, + { + "query": "Convert this Python script to TypeScript: def get_user_data(): ...", + "context_cues": ["Python source = snake_case", "TypeScript target = camelCase"], + "correct_for": "convert snake_case to camelCase in TypeScript output", + "why_context_fails": "Might keep snake_case in TypeScript", + "why_rag_wins": "Output language triggers appropriate convention" + }, + { + "query": "Write SQL to join users and orders, then show Python code to run it.", + "context_cues": ["SQL = UPPERCASE keywords", "Python = snake_case"], + "correct_for": "SQL: SELECT, FROM; Python: result_set, fetch_data", + "why_context_fails": "Style bleeds across languages", + "why_rag_wins": "Separate retrieval for each language context" + } + ], + + "code_comment_style": [ + { + "query": "Here's a 5-line utility function, explain what each part does.", + "context_cues": ["5-line = short", "explain each part = inline comments"], + "correct_for": "inline comments for each line", + "why_context_fails": "Might use docstring style for short code", + "why_rag_wins": "Short code + explanation request = inline comments" + }, + { + "query": "Write a complete data processing class with documentation.", + "context_cues": ["complete class = production code", "documentation = docstrings"], + "correct_for": "docstrings at class/method level, minimal inline", + "why_context_fails": "Over-comments with inline explanations", + "why_rag_wins": "Class + documentation context triggers docstring pref" + } + ], + + "code_review_scope": [ + { + "query": "Review this code for bugs, I need to ship it today.", + "context_cues": ["review = code review", "ship today = urgent, bugs only"], + "correct_for": "bugs only, skip style", + "why_context_fails": "Still comments on style issues", + "why_rag_wins": "Urgency cue + 'bugs' retrieves bugs-only preference" + }, + { + "query": "Look at my code and help me improve it for the codebase.", + "context_cues": ["improve = refactor scope", "for codebase = style matters"], + "correct_for": "both logic and style suggestions", + "why_context_fails": "Might only focus on bugs", + "why_rag_wins": "'Improve' and 'codebase' retrieve full-review pref" + } + ], + + # ========================================================================= + # INTERACTION CONFLICTS + # ========================================================================= + "interaction_autonomy": [ + { + "query": "Refactor the authentication module.", + "context_cues": ["refactor = significant change", "no specific instruction"], + "correct_for": "confirm approach first", + "why_context_fails": "Might just start refactoring without plan", + "why_rag_wins": "Ambiguous scope triggers confirmation pref" + }, + { + "query": "Change the variable name from 'x' to 'count' in line 5.", + "context_cues": ["specific instruction", "single change"], + "correct_for": "execute directly, no confirmation needed", + "why_context_fails": "Might still ask for confirmation", + "why_rag_wins": "Specific instruction retrieves execute-directly pref" + }, + { + "query": "Update the database schema to add user preferences - it's complex.", + "context_cues": ["update schema = significant", "complex = acknowledged"], + "correct_for": "definitely confirm - user said it's complex", + "why_context_fails": "Might dive in because 'update' sounds actionable", + "why_rag_wins": "'Complex' keyword strongly triggers confirmation" + } + ], + + "interaction_guidance": [ + { + "query": "Should I use Redis or Memcached for caching?", + "context_cues": ["should I = asking for recommendation", "or = comparison"], + "correct_for": "give recommendation with rationale", + "why_context_fails": "Gives neutral pros/cons without recommendation", + "why_rag_wins": "'Should I' retrieves recommendation preference" + }, + { + "query": "Compare React, Vue, and Angular for my project.", + "context_cues": ["compare = explicit comparison", "my project = context needed"], + "correct_for": "table format with tradeoffs", + "why_context_fails": "Might just recommend one or give long prose", + "why_rag_wins": "'Compare' retrieves comparison-table preference" + } + ], + + # ========================================================================= + # MATH/EXPLANATION CONFLICTS + # ========================================================================= + "math_detail_level": [ + { + "query": "What's the derivative of x^2? I'm preparing for an exam.", + "context_cues": ["what's = direct ask", "exam prep = practice context"], + "correct_for": "show steps + give practice problem", + "why_context_fails": "Just gives answer (2x) without exam context", + "why_rag_wins": "'Exam' retrieves practice-problem preference" + }, + { + "query": "Verify my answer: integral of sin(x) = -cos(x) + C. Is this right?", + "context_cues": ["verify = checking work", "is this right = confirmation"], + "correct_for": "check step by step, confirm or point out issue", + "why_context_fails": "Might re-derive from scratch", + "why_rag_wins": "'Verify' retrieves check-their-work preference" + } + ], + + "math_approach": [ + { + "query": "What's the probability of rolling two sixes?", + "context_cues": ["probability = statistics", "rolling dice = intuitive example"], + "correct_for": "intuition first (1 in 36), then formula", + "why_context_fails": "Starts with P(A∩B) = P(A)P(B) formula", + "why_rag_wins": "Statistics topic retrieves intuition-first preference" + }, + { + "query": "Prove that the sum of angles in a triangle is 180°.", + "context_cues": ["prove = formal proof", "geometry = visual possible"], + "correct_for": "structured proof format per preference", + "why_context_fails": "Might give intuitive explanation instead of proof", + "why_rag_wins": "'Prove' retrieves proof-format preference" + } + ], + + # ========================================================================= + # DOMAIN CONFLICTS + # ========================================================================= + "domain_example_position": [ + { + "query": "How do I use the requests library in Python?", + "context_cues": ["how do I use = practical/API", "library = code example helpful"], + "correct_for": "minimal example first, then explain parameters", + "why_context_fails": "Explains parameters first, example last", + "why_rag_wins": "API/library context retrieves example-first preference" + }, + { + "query": "What is dynamic programming?", + "context_cues": ["what is = concept/theory", "definition needed"], + "correct_for": "definition first, then example, then edge cases", + "why_context_fails": "Might lead with example (Fibonacci)", + "why_rag_wins": "Theory context retrieves definition-first preference" + } + ], + + # ========================================================================= + # OUTPUT ARTIFACT CONFLICTS + # ========================================================================= + "output_code_presentation": [ + { + "query": "Give me a sorting function I can use, I'm in a hurry.", + "context_cues": ["give me = copyable", "in a hurry = no explanation"], + "correct_for": "single code block, no prose", + "why_context_fails": "Adds explanatory prose between code", + "why_rag_wins": "'Give me' + 'hurry' retrieves copy-paste preference" + }, + { + "query": "Teach me how to implement quicksort step by step.", + "context_cues": ["teach me = learning", "step by step = chunked"], + "correct_for": "code in small chunks with explanation between", + "why_context_fails": "Gives full implementation at once", + "why_rag_wins": "'Teach' + 'step by step' retrieves chunked preference" + } + ], + + # ========================================================================= + # CORRECTION STYLE CONFLICTS + # ========================================================================= + "correction_severity": [ + { + "query": "I'm using a hashmap to store my data, is this right?", + "context_cues": ["hashmap = might mean dict/map", "is this right = validation"], + "correct_for": "gentle inline (hashmap is fine, also called dict)", + "why_context_fails": "Might pedantically correct terminology", + "why_rag_wins": "Minor terminology + validation retrieves gentle-correction pref" + }, + { + "query": "I think recursion is just loops with extra steps, right?", + "context_cues": ["fundamental misconception", "asking for validation"], + "correct_for": "directly address misconception before proceeding", + "why_context_fails": "Might gloss over and just show recursion", + "why_rag_wins": "Fundamental error retrieves explicit-correction preference" + } + ], + + # ========================================================================= + # MULTI-DOMAIN CONFLICTS (hardest!) + # ========================================================================= + "multi_domain_complex": [ + { + "query": "Quick question - walk me through implementing a binary tree in Python with proper documentation.", + "context_cues": ["quick = brief", "walk through = detailed", "documentation = thorough"], + "correct_for": "quick wins (explicit), but include docstrings (documentation ask)", + "why_context_fails": "Confused by conflicting signals, inconsistent response", + "why_rag_wins": "Explicit brevity cue retrieved, documentation pref adds docstrings" + }, + { + "query": "I'm debugging my ML model and it's not converging. This is frustrating! Compare Adam vs SGD for me.", + "context_cues": ["debugging = focus on issue", "frustrating = emotional", "compare = table"], + "correct_for": "acknowledge frustration, then comparison table for optimizers", + "why_context_fails": "Might skip emotional acknowledgment or wrong format", + "why_rag_wins": "Frustration pref + comparison pref both retrieved, applied in order" + }, + { + "query": "Review this Python code and convert it to JavaScript. Focus on bugs first.", + "context_cues": ["review = bugs per 'focus' cue", "convert = language change"], + "correct_for": "Python review (bugs only) + JS conversion (camelCase)", + "why_context_fails": "Applies wrong scope or wrong naming convention", + "why_rag_wins": "Multiple relevant prefs retrieved per task segment" + } + ] +} + + +# ============================================================================ +# Scenario Generator +# ============================================================================ + +class ConflictScenarioGenerator: + """Generates conflict scenarios from templates and user profiles.""" + + def __init__(self, profile: dict = None, seed: int = 42): + self.profile = profile + self.preferences = {p['pref_id']: p for p in profile['preferences']} if profile else {} + self.random = random.Random(seed) + + def generate_for_profile(self, preferences: list, domain: str = None) -> dict: + """Generate a single conflict scenario for given preferences and domain.""" + # Find conflict groups in these preferences + conflict_groups = {} + for pref in preferences: + cg = pref.get('conflict_group') + if cg: + if cg not in conflict_groups: + conflict_groups[cg] = [] + conflict_groups[cg].append(pref) + + # Find a conflict group with at least 2 preferences + for cg, prefs in conflict_groups.items(): + if len(prefs) >= 2 and cg in CONFLICT_TEMPLATES: + templates = CONFLICT_TEMPLATES[cg] + template = self.random.choice(templates) + return { + "query": template['query'], + "conflict_group": cg, + "preferences": prefs, + "expected_preference": prefs[0]['pref_id'], # First one as expected + } + return None + + def generate_scenarios(self, num_per_conflict_type: int = 3) -> list: + """Generate conflict scenarios based on profile's preferences.""" + scenarios = [] + + for conflict_group, templates in CONFLICT_TEMPLATES.items(): + # Check if this conflict group exists in user's preferences + relevant_prefs = [ + p for p in self.profile['preferences'] + if p.get('conflict_group') == conflict_group + ] + + if len(relevant_prefs) < 2: + continue # Need at least 2 prefs to have a conflict + + # Generate scenarios from templates + selected_templates = self.random.sample( + templates, + min(num_per_conflict_type, len(templates)) + ) + + for i, template in enumerate(selected_templates): + scenario = self._create_scenario( + conflict_group, template, relevant_prefs, i + ) + if scenario: + scenarios.append(scenario) + + return scenarios + + def _create_scenario( + self, + conflict_group: str, + template: dict, + relevant_prefs: list, + index: int + ) -> ConflictScenario: + """Create a scenario from a template.""" + # Determine which preference is correct + # Based on context cues in the query + query = template['query'] + correct_pref = self._determine_correct_preference(query, relevant_prefs) + wrong_prefs = [p for p in relevant_prefs if p['pref_id'] != correct_pref['pref_id']] + + return ConflictScenario( + scenario_id=f"{conflict_group}_{index:03d}", + conflict_group=conflict_group, + query=query, + context_cues=template.get('context_cues', []), + triggered_prefs=[p['pref_id'] for p in relevant_prefs], + correct_pref_id=correct_pref['pref_id'], + wrong_pref_ids=[p['pref_id'] for p in wrong_prefs], + why_correct=template.get('correct_for', ''), + expected_rag_behavior=template.get('why_rag_wins', ''), + expected_context_failure=template.get('why_context_fails', '') + ) + + def _determine_correct_preference(self, query: str, prefs: list) -> dict: + """ + Determine which preference is correct for a query. + Uses keyword matching on priority_context. + """ + query_lower = query.lower() + scores = [] + + for pref in prefs: + score = 0 + for keyword in pref.get('priority_context', []): + if keyword.lower() in query_lower: + score += 1 + # Bonus for condition match + if pref.get('condition', '').lower() in query_lower: + score += 2 + scores.append((pref, score)) + + # Return highest scoring preference + scores.sort(key=lambda x: x[1], reverse=True) + return scores[0][0] if scores else prefs[0] + + +def generate_conflict_enriched_dataset( + profiles_path: str, + output_path: str, + scenarios_per_conflict: int = 3, + seed: int = 42 +): + """ + Generate a dataset where every query triggers at least one conflict. + """ + profiles = [] + with open(profiles_path) as f: + for line in f: + profiles.append(json.loads(line)) + + all_scenarios = [] + conflict_coverage = {} + + for profile in profiles: + generator = ConflictScenarioGenerator(profile, seed) + scenarios = generator.generate_scenarios(scenarios_per_conflict) + + for scenario in scenarios: + scenario_dict = { + 'user_id': profile['user_id'], + 'scenario_id': scenario.scenario_id, + 'conflict_group': scenario.conflict_group, + 'query': scenario.query, + 'context_cues': scenario.context_cues, + 'triggered_prefs': scenario.triggered_prefs, + 'correct_pref_id': scenario.correct_pref_id, + 'wrong_pref_ids': scenario.wrong_pref_ids, + 'why_correct': scenario.why_correct, + 'expected_rag_behavior': scenario.expected_rag_behavior, + 'expected_context_failure': scenario.expected_context_failure + } + all_scenarios.append(scenario_dict) + + # Track coverage + cg = scenario.conflict_group + conflict_coverage[cg] = conflict_coverage.get(cg, 0) + 1 + + # Save + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w') as f: + for scenario in all_scenarios: + f.write(json.dumps(scenario) + '\n') + + print(f"Generated {len(all_scenarios)} conflict scenarios") + print(f"Coverage by conflict type:") + for cg, count in sorted(conflict_coverage.items()): + print(f" {cg}: {count}") + + return all_scenarios + + +def create_evaluation_harness(scenarios: list) -> dict: + """ + Create an evaluation harness that programmatically checks + if the correct preference was applied. + """ + harness = { + "total_scenarios": len(scenarios), + "by_conflict_type": {}, + "evaluation_functions": {} + } + + # Group by conflict type + for scenario in scenarios: + cg = scenario['conflict_group'] + if cg not in harness['by_conflict_type']: + harness['by_conflict_type'][cg] = [] + harness['by_conflict_type'][cg].append(scenario) + + # Add evaluation functions for each conflict type + harness['evaluation_functions'] = { + "format_structure": check_format_structure, + "verbosity": check_verbosity, + "naming_convention": check_naming_convention, + "answer_position": check_answer_position, + # ... more evaluators + } + + return harness + + +# ============================================================================ +# Evaluation Functions (check if correct preference was applied) +# ============================================================================ + +def check_format_structure(response: str, correct_pref: dict) -> bool: + """Check if response uses correct format (bullets vs numbered).""" + has_bullets = bool(any(c in response for c in ['•', '-', '*'])) + has_numbers = bool(any(f"{i}." in response or f"{i})" in response for i in range(1, 10))) + + if 'bullet' in correct_pref.get('action', '').lower(): + return has_bullets and not has_numbers + elif 'numbered' in correct_pref.get('action', '').lower(): + return has_numbers + return True # Can't determine + + +def check_verbosity(response: str, correct_pref: dict) -> bool: + """Check if response matches verbosity preference.""" + word_count = len(response.split()) + + if 'concise' in correct_pref.get('action', '').lower() or \ + '3 sentences' in correct_pref.get('action', '').lower(): + return word_count < 100 # Rough threshold + elif 'detailed' in correct_pref.get('action', '').lower(): + return word_count > 150 + return True + + +def check_naming_convention(response: str, correct_pref: dict) -> bool: + """Check if code uses correct naming convention.""" + import re + + # Look for function/variable definitions + if 'snake_case' in correct_pref.get('action', '').lower(): + # Should have underscores, no camelCase + has_snake = bool(re.search(r'[a-z]+_[a-z]+', response)) + has_camel = bool(re.search(r'[a-z]+[A-Z][a-z]+', response)) + return has_snake and not has_camel + + elif 'camelCase' in correct_pref.get('action', '').lower(): + has_camel = bool(re.search(r'[a-z]+[A-Z][a-z]+', response)) + return has_camel + + return True + + +def check_answer_position(response: str, correct_pref: dict) -> bool: + """Check if answer comes first or explanation builds up.""" + # Simplified: check if response starts with answer-like content + first_sentence = response.split('.')[0] if '.' in response else response[:100] + + if 'answer first' in correct_pref.get('action', '').lower(): + # First sentence should be direct + direct_indicators = ['is', 'are', 'the answer', 'yes', 'no', 'it\'s'] + return any(ind in first_sentence.lower() for ind in direct_indicators) + + elif 'build up' in correct_pref.get('action', '').lower(): + # First sentence should be explanatory + buildup_indicators = ['let\'s', 'first', 'to understand', 'consider'] + return any(ind in first_sentence.lower() for ind in buildup_indicators) + + return True + + +# ============================================================================ +# Main +# ============================================================================ + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--profiles", default="collaborativeagents/data/complex_profiles/profiles.jsonl") + parser.add_argument("--output", default="collaborativeagents/data/conflict_scenarios.jsonl") + parser.add_argument("--scenarios_per_conflict", type=int, default=3) + parser.add_argument("--seed", type=int, default=42) + + args = parser.parse_args() + + scenarios = generate_conflict_enriched_dataset( + args.profiles, + args.output, + args.scenarios_per_conflict, + args.seed + ) |
