""" Generate 100 complex user profiles with ~40 conditional preferences using LLM. Key differences from original CollaborativeAgents: 1. 40 conditional preferences (vs their 3 flat preferences) 2. Preferences have explicit conditions for when they apply 3. Conflict groups marked for testing conflict resolution 4. LLM-based batch generation with quality control """ import json import random import hashlib from pathlib import Path from dataclasses import dataclass, field, asdict from typing import Optional, List, Dict, Any import argparse try: import litellm except ImportError: litellm = None # ============================================================================= # Preference Category Definitions # ============================================================================= PREFERENCE_CATEGORIES = { "response_format": { "num_preferences": 4, "conflicts": [("rf_bullets", "rf_numbered"), ("rf_answer_first", "rf_build_up")], "prompt": """Generate 4 preferences about response FORMAT: 1. When to use bullet points vs numbered lists 2. When to lead with the answer vs build up to it Each must have: - A SPECIFIC condition (trigger phrase or situation) - A clear action (what to do) - Conflict group (format_structure or answer_position) - Priority keywords that trigger this preference Make conditions mutually exclusive within each conflict group.""" }, "verbosity": { "num_preferences": 5, "conflicts": [("vb_concise", "vb_detailed"), ("vb_explain_why", "vb_just_answer")], "prompt": """Generate 5 preferences about VERBOSITY/LENGTH: 1. When to be concise (user says "quick", "briefly", "TL;DR") 2. When to be detailed (complex topics, "explain", "in depth") 3. When to explain reasoning vs just give answer Include explicit trigger phrases in conditions. Conflict groups: response_length, explanation_depth""" }, "code_style": { "num_preferences": 8, "conflicts": [ ("cs_snake", "cs_camel", "cs_sql_upper"), # By language ("cs_inline_comments", "cs_docstrings"), # Comment style ("cs_bugs_only", "cs_full_review") # Review scope ], "prompt": """Generate 8 preferences about CODE STYLE: 1-3. Naming conventions BY LANGUAGE (Python=snake_case, JS=camelCase, SQL=UPPERCASE) 4-5. Comment styles for short snippets vs production code 6-7. Code review scope (bugs only vs style too) 8. Error handling preference Conflict groups: naming_convention, comment_style, review_scope""" }, "math_style": { "num_preferences": 6, "conflicts": [("ms_show_steps", "ms_high_level"), ("ms_intuition", "ms_formula")], "prompt": """Generate 6 preferences about MATHEMATICAL explanations: 1-2. When to show detailed steps vs high-level approach 3-4. When to lead with intuition vs formula (statistics vs pure math) 5. How to structure proofs 6. Practice problems when studying for exams Conflict groups: math_detail, math_approach""" }, "interaction_pattern": { "num_preferences": 6, "conflicts": [("ip_confirm", "ip_execute"), ("ip_recommend", "ip_compare")], "prompt": """Generate 6 preferences about INTERACTION patterns: 1-2. When to confirm before acting vs execute directly 3-4. When to recommend vs present options/comparison 5. How to handle user frustration 6. How to handle user thanks/satisfaction Conflict groups: autonomy, guidance_style""" }, "domain_specific": { "num_preferences": 6, "conflicts": [("ds_example_first", "ds_definition_first")], "prompt": """Generate 6 DOMAIN-SPECIFIC preferences: 1. ML explanations (include math formulation) 2. System design (components list before interactions) 3. API/library usage (example first) 4. Theoretical concepts (definition first) 5. Data structures (include complexity) 6. Documentation style Conflict group: example_position""" }, "error_correction": { "num_preferences": 4, "conflicts": [("ec_gentle", "ec_direct")], "prompt": """Generate 4 preferences about ERROR CORRECTION: 1. Minor terminology errors (correct gently inline) 2. Fundamental misconceptions (address directly) 3. Code bugs 4. Agent's own mistakes Conflict group: correction_style""" }, "output_artifacts": { "num_preferences": 4, "conflicts": [("oa_single_block", "oa_chunked")], "prompt": """Generate 4 preferences about OUTPUT format: 1. Copyable code (single block) 2. Teaching code (chunked with explanations) 3. Terminal commands (bash blocks with expected output) 4. Always specify language in code fences Conflict group: code_presentation""" } } LLM_PREFERENCE_GENERATION_PROMPT = """You are generating CONDITIONAL user preferences for a personalization benchmark. # Category: {category_name} # Number of preferences to generate: {num_preferences} {category_prompt} # Output Requirements Generate exactly {num_preferences} preferences in this JSON format: ```json {{ "preferences": [ {{ "pref_id": "{prefix}_001", "condition": "When X happens / When user says Y / For Z type of content", "action": "Do A, B, C (be specific)", "conflict_group": "group_name_or_null", "priority_context": ["keyword1", "keyword2", "phrase1"] }}, ... ] }} ``` # Critical Rules: 1. Conditions must be SPECIFIC and OBSERVABLE (include trigger phrases) 2. Within a conflict group, conditions must be MUTUALLY EXCLUSIVE 3. Priority_context keywords should appear in queries that trigger this preference 4. Actions must be concrete and verifiable Generate preferences that will: - Create interesting conflicts (RAG should resolve correctly, context methods fail) - Be testable (we can verify if an agent followed them) - Be realistic (based on actual user behavior) Output ONLY the JSON, no other text.""" PERSONA_GENERATION_PROMPT = """Generate a realistic user persona (2-3 sentences) that would naturally have these preference categories: {categories} The persona should be a software developer, researcher, or technical professional. Include: - Professional background (role, experience level, domain) - Communication style tendencies - Work context Output ONLY the persona text, no JSON or formatting.""" # ============================================================================= # Profile Generator # ============================================================================= class ProfileGenerator: """Generate complex user profiles with conditional preferences.""" def __init__(self, model: str = "meta-llama/Llama-3.1-70B-Instruct", seed: int = 42): self.model = model self.random = random.Random(seed) if litellm is None: raise ImportError("litellm required for profile generation") def _call_llm(self, prompt: str, json_mode: bool = True) -> str: """Call LLM with prompt.""" kwargs = { "model": self.model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.7, "max_tokens": 4096, } if json_mode: kwargs["response_format"] = {"type": "json_object"} response = litellm.completion(**kwargs) return response.choices[0].message.content def _parse_json(self, text: str) -> dict: """Parse JSON from response.""" import re try: return json.loads(text) except json.JSONDecodeError: pass # Try markdown code block match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text) if match: try: return json.loads(match.group(1)) except: pass # Try to find JSON object match = re.search(r'\{[\s\S]*\}', text) if match: try: return json.loads(match.group()) except: pass raise ValueError(f"Failed to parse JSON from: {text[:500]}") def generate_preferences_for_category( self, category: str, prefix: str ) -> List[Dict]: """Generate preferences for a single category.""" cat_info = PREFERENCE_CATEGORIES[category] prompt = LLM_PREFERENCE_GENERATION_PROMPT.format( category_name=category, num_preferences=cat_info["num_preferences"], category_prompt=cat_info["prompt"], prefix=prefix ) response = self._call_llm(prompt, json_mode=True) data = self._parse_json(response) prefs = data.get("preferences", data) if isinstance(prefs, dict): prefs = list(prefs.values()) # Validate and fix pref_ids for i, pref in enumerate(prefs): if "pref_id" not in pref: pref["pref_id"] = f"{prefix}_{i+1:03d}" return prefs def generate_persona(self, categories: List[str]) -> str: """Generate a persona for the given preference categories.""" prompt = PERSONA_GENERATION_PROMPT.format( categories=", ".join(categories) ) return self._call_llm(prompt, json_mode=False).strip() def generate_profile(self, user_id: str) -> Dict: """Generate a complete user profile with ~40 preferences.""" all_preferences = [] category_prefixes = { "response_format": "rf", "verbosity": "vb", "code_style": "cs", "math_style": "ms", "interaction_pattern": "ip", "domain_specific": "ds", "error_correction": "ec", "output_artifacts": "oa" } print(f" Generating preferences for {user_id}...") for category, prefix in category_prefixes.items(): try: prefs = self.generate_preferences_for_category(category, prefix) all_preferences.extend(prefs) print(f" {category}: {len(prefs)} preferences") except Exception as e: print(f" ERROR in {category}: {e}") # Generate persona print(f" Generating persona...") persona = self.generate_persona(list(category_prefixes.keys())) # Build conflict groups mapping conflict_groups = {} for pref in all_preferences: cg = pref.get("conflict_group") if cg: if cg not in conflict_groups: conflict_groups[cg] = [] conflict_groups[cg].append(pref["pref_id"]) return { "user_id": user_id, "persona": persona, "preferences": all_preferences, "conflict_groups": conflict_groups, "meta": { "total_preferences": len(all_preferences), "total_conflict_groups": len(conflict_groups), "generator": "generate_profiles_v2.py" } } def generate_profiles_batch( num_profiles: int, output_path: Path, model: str = "meta-llama/Llama-3.1-70B-Instruct", seed: int = 42 ): """Generate multiple profiles.""" generator = ProfileGenerator(model=model, seed=seed) profiles = [] for i in range(num_profiles): user_id = f"user_{hashlib.md5(f'{seed}_{i}'.encode()).hexdigest()[:8]}" print(f"\n[{i+1}/{num_profiles}] Generating profile: {user_id}") try: profile = generator.generate_profile(user_id) profiles.append(profile) print(f" Generated {profile['meta']['total_preferences']} preferences") except Exception as e: print(f" ERROR: {e}") continue # Save output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w') as f: for profile in profiles: f.write(json.dumps(profile) + '\n') print(f"\n{'='*60}") print(f"Generated {len(profiles)} profiles") print(f"Saved to: {output_path}") return profiles # ============================================================================= # Fallback: Generate from Schema (No LLM Required) # ============================================================================= def generate_profiles_from_schema( num_profiles: int, schema_path: Path, output_path: Path, seed: int = 42 ) -> List[Dict]: """ Generate profiles from the predefined schema (no LLM calls). Useful for testing or when API is unavailable. """ with open(schema_path) as f: schema = json.load(f) random.seed(seed) profiles = [] # Extract all preferences from schema all_prefs = [] for cat in schema["preference_categories"]: all_prefs.extend(cat["preferences"]) # Sample personas sample_personas = [ "A senior backend engineer who values efficiency and directness. Prefers practical solutions over theoretical discussions.", "A PhD student in ML who is meticulous about mathematical rigor. Appreciates step-by-step derivations.", "A junior developer learning full-stack. Prefers patient, incremental explanations with examples.", "A DevOps engineer focused on automation. Wants concise, actionable answers with commands to run.", "A data scientist who thinks visually. Prefers intuition before formulas and lots of examples.", "A tech lead reviewing code from their team. Focuses on maintainability and best practices.", "A researcher prototyping quickly. Wants working code fast, willing to refactor later.", "A student preparing for technical interviews. Needs step-by-step problem solving practice.", ] for i in range(num_profiles): user_id = f"user_{hashlib.md5(f'{seed}_{i}'.encode()).hexdigest()[:8]}" # Select random subset of preferences (35-45) num_prefs = random.randint(35, 45) selected_prefs = random.sample(all_prefs, min(num_prefs, len(all_prefs))) # Build conflict groups conflict_groups = {} for pref in selected_prefs: cg = pref.get("conflict_group") if cg: if cg not in conflict_groups: conflict_groups[cg] = [] conflict_groups[cg].append(pref["pref_id"]) profile = { "user_id": user_id, "persona": random.choice(sample_personas), "preferences": selected_prefs, "conflict_groups": conflict_groups, "meta": { "total_preferences": len(selected_prefs), "total_conflict_groups": len(conflict_groups), "generator": "schema_based" } } profiles.append(profile) # Save output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w') as f: for profile in profiles: f.write(json.dumps(profile) + '\n') print(f"Generated {len(profiles)} profiles from schema") return profiles # ============================================================================= # Main # ============================================================================= if __name__ == "__main__": parser = argparse.ArgumentParser( description="Generate complex user profiles with conditional preferences" ) parser.add_argument("--num_profiles", type=int, default=100, help="Number of profiles to generate") parser.add_argument("--output", type=str, default="collaborativeagents/data/complex_profiles_v2/profiles.jsonl") parser.add_argument("--model", type=str, default="meta-llama/Llama-3.1-70B-Instruct", help="LLM model for generation") parser.add_argument("--seed", type=int, default=42) parser.add_argument("--from_schema", type=str, default=None, help="Generate from schema file instead of LLM") args = parser.parse_args() output_path = Path(args.output) if args.from_schema: generate_profiles_from_schema( num_profiles=args.num_profiles, schema_path=Path(args.from_schema), output_path=output_path, seed=args.seed ) else: generate_profiles_batch( num_profiles=args.num_profiles, output_path=output_path, model=args.model, seed=args.seed )