diff options
Diffstat (limited to 'collaborativeagents/scripts/generate_profiles_v2.py')
| -rw-r--r-- | collaborativeagents/scripts/generate_profiles_v2.py | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/collaborativeagents/scripts/generate_profiles_v2.py b/collaborativeagents/scripts/generate_profiles_v2.py new file mode 100644 index 0000000..c431302 --- /dev/null +++ b/collaborativeagents/scripts/generate_profiles_v2.py @@ -0,0 +1,475 @@ +""" +Generate 100 complex user profiles with ~40 conditional preferences using LLM. + +Key differences from original CollaborativeAgents: +1. 40 conditional preferences (vs their 3 flat preferences) +2. Preferences have explicit conditions for when they apply +3. Conflict groups marked for testing conflict resolution +4. LLM-based batch generation with quality control +""" + +import json +import random +import hashlib +from pathlib import Path +from dataclasses import dataclass, field, asdict +from typing import Optional, List, Dict, Any +import argparse + +try: + import litellm +except ImportError: + litellm = None + + +# ============================================================================= +# Preference Category Definitions +# ============================================================================= + +PREFERENCE_CATEGORIES = { + "response_format": { + "num_preferences": 4, + "conflicts": [("rf_bullets", "rf_numbered"), ("rf_answer_first", "rf_build_up")], + "prompt": """Generate 4 preferences about response FORMAT: +1. When to use bullet points vs numbered lists +2. When to lead with the answer vs build up to it + +Each must have: +- A SPECIFIC condition (trigger phrase or situation) +- A clear action (what to do) +- Conflict group (format_structure or answer_position) +- Priority keywords that trigger this preference + +Make conditions mutually exclusive within each conflict group.""" + }, + + "verbosity": { + "num_preferences": 5, + "conflicts": [("vb_concise", "vb_detailed"), ("vb_explain_why", "vb_just_answer")], + "prompt": """Generate 5 preferences about VERBOSITY/LENGTH: +1. When to be concise (user says "quick", "briefly", "TL;DR") +2. When to be detailed (complex topics, "explain", "in depth") +3. When to explain reasoning vs just give answer + +Include explicit trigger phrases in conditions. +Conflict groups: response_length, explanation_depth""" + }, + + "code_style": { + "num_preferences": 8, + "conflicts": [ + ("cs_snake", "cs_camel", "cs_sql_upper"), # By language + ("cs_inline_comments", "cs_docstrings"), # Comment style + ("cs_bugs_only", "cs_full_review") # Review scope + ], + "prompt": """Generate 8 preferences about CODE STYLE: +1-3. Naming conventions BY LANGUAGE (Python=snake_case, JS=camelCase, SQL=UPPERCASE) +4-5. Comment styles for short snippets vs production code +6-7. Code review scope (bugs only vs style too) +8. Error handling preference + +Conflict groups: naming_convention, comment_style, review_scope""" + }, + + "math_style": { + "num_preferences": 6, + "conflicts": [("ms_show_steps", "ms_high_level"), ("ms_intuition", "ms_formula")], + "prompt": """Generate 6 preferences about MATHEMATICAL explanations: +1-2. When to show detailed steps vs high-level approach +3-4. When to lead with intuition vs formula (statistics vs pure math) +5. How to structure proofs +6. Practice problems when studying for exams + +Conflict groups: math_detail, math_approach""" + }, + + "interaction_pattern": { + "num_preferences": 6, + "conflicts": [("ip_confirm", "ip_execute"), ("ip_recommend", "ip_compare")], + "prompt": """Generate 6 preferences about INTERACTION patterns: +1-2. When to confirm before acting vs execute directly +3-4. When to recommend vs present options/comparison +5. How to handle user frustration +6. How to handle user thanks/satisfaction + +Conflict groups: autonomy, guidance_style""" + }, + + "domain_specific": { + "num_preferences": 6, + "conflicts": [("ds_example_first", "ds_definition_first")], + "prompt": """Generate 6 DOMAIN-SPECIFIC preferences: +1. ML explanations (include math formulation) +2. System design (components list before interactions) +3. API/library usage (example first) +4. Theoretical concepts (definition first) +5. Data structures (include complexity) +6. Documentation style + +Conflict group: example_position""" + }, + + "error_correction": { + "num_preferences": 4, + "conflicts": [("ec_gentle", "ec_direct")], + "prompt": """Generate 4 preferences about ERROR CORRECTION: +1. Minor terminology errors (correct gently inline) +2. Fundamental misconceptions (address directly) +3. Code bugs +4. Agent's own mistakes + +Conflict group: correction_style""" + }, + + "output_artifacts": { + "num_preferences": 4, + "conflicts": [("oa_single_block", "oa_chunked")], + "prompt": """Generate 4 preferences about OUTPUT format: +1. Copyable code (single block) +2. Teaching code (chunked with explanations) +3. Terminal commands (bash blocks with expected output) +4. Always specify language in code fences + +Conflict group: code_presentation""" + } +} + + +LLM_PREFERENCE_GENERATION_PROMPT = """You are generating CONDITIONAL user preferences for a personalization benchmark. + +# Category: {category_name} +# Number of preferences to generate: {num_preferences} + +{category_prompt} + +# Output Requirements +Generate exactly {num_preferences} preferences in this JSON format: +```json +{{ + "preferences": [ + {{ + "pref_id": "{prefix}_001", + "condition": "When X happens / When user says Y / For Z type of content", + "action": "Do A, B, C (be specific)", + "conflict_group": "group_name_or_null", + "priority_context": ["keyword1", "keyword2", "phrase1"] + }}, + ... + ] +}} +``` + +# Critical Rules: +1. Conditions must be SPECIFIC and OBSERVABLE (include trigger phrases) +2. Within a conflict group, conditions must be MUTUALLY EXCLUSIVE +3. Priority_context keywords should appear in queries that trigger this preference +4. Actions must be concrete and verifiable + +Generate preferences that will: +- Create interesting conflicts (RAG should resolve correctly, context methods fail) +- Be testable (we can verify if an agent followed them) +- Be realistic (based on actual user behavior) + +Output ONLY the JSON, no other text.""" + + +PERSONA_GENERATION_PROMPT = """Generate a realistic user persona (2-3 sentences) that would naturally have these preference categories: +{categories} + +The persona should be a software developer, researcher, or technical professional. Include: +- Professional background (role, experience level, domain) +- Communication style tendencies +- Work context + +Output ONLY the persona text, no JSON or formatting.""" + + +# ============================================================================= +# Profile Generator +# ============================================================================= + +class ProfileGenerator: + """Generate complex user profiles with conditional preferences.""" + + def __init__(self, model: str = "meta-llama/Llama-3.1-70B-Instruct", seed: int = 42): + self.model = model + self.random = random.Random(seed) + + if litellm is None: + raise ImportError("litellm required for profile generation") + + def _call_llm(self, prompt: str, json_mode: bool = True) -> str: + """Call LLM with prompt.""" + kwargs = { + "model": self.model, + "messages": [{"role": "user", "content": prompt}], + "temperature": 0.7, + "max_tokens": 4096, + } + + if json_mode: + kwargs["response_format"] = {"type": "json_object"} + + response = litellm.completion(**kwargs) + return response.choices[0].message.content + + def _parse_json(self, text: str) -> dict: + """Parse JSON from response.""" + import re + + try: + return json.loads(text) + except json.JSONDecodeError: + pass + + # Try markdown code block + match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text) + if match: + try: + return json.loads(match.group(1)) + except: + pass + + # Try to find JSON object + match = re.search(r'\{[\s\S]*\}', text) + if match: + try: + return json.loads(match.group()) + except: + pass + + raise ValueError(f"Failed to parse JSON from: {text[:500]}") + + def generate_preferences_for_category( + self, + category: str, + prefix: str + ) -> List[Dict]: + """Generate preferences for a single category.""" + cat_info = PREFERENCE_CATEGORIES[category] + + prompt = LLM_PREFERENCE_GENERATION_PROMPT.format( + category_name=category, + num_preferences=cat_info["num_preferences"], + category_prompt=cat_info["prompt"], + prefix=prefix + ) + + response = self._call_llm(prompt, json_mode=True) + data = self._parse_json(response) + + prefs = data.get("preferences", data) + if isinstance(prefs, dict): + prefs = list(prefs.values()) + + # Validate and fix pref_ids + for i, pref in enumerate(prefs): + if "pref_id" not in pref: + pref["pref_id"] = f"{prefix}_{i+1:03d}" + + return prefs + + def generate_persona(self, categories: List[str]) -> str: + """Generate a persona for the given preference categories.""" + prompt = PERSONA_GENERATION_PROMPT.format( + categories=", ".join(categories) + ) + + return self._call_llm(prompt, json_mode=False).strip() + + def generate_profile(self, user_id: str) -> Dict: + """Generate a complete user profile with ~40 preferences.""" + all_preferences = [] + category_prefixes = { + "response_format": "rf", + "verbosity": "vb", + "code_style": "cs", + "math_style": "ms", + "interaction_pattern": "ip", + "domain_specific": "ds", + "error_correction": "ec", + "output_artifacts": "oa" + } + + print(f" Generating preferences for {user_id}...") + for category, prefix in category_prefixes.items(): + try: + prefs = self.generate_preferences_for_category(category, prefix) + all_preferences.extend(prefs) + print(f" {category}: {len(prefs)} preferences") + except Exception as e: + print(f" ERROR in {category}: {e}") + + # Generate persona + print(f" Generating persona...") + persona = self.generate_persona(list(category_prefixes.keys())) + + # Build conflict groups mapping + conflict_groups = {} + for pref in all_preferences: + cg = pref.get("conflict_group") + if cg: + if cg not in conflict_groups: + conflict_groups[cg] = [] + conflict_groups[cg].append(pref["pref_id"]) + + return { + "user_id": user_id, + "persona": persona, + "preferences": all_preferences, + "conflict_groups": conflict_groups, + "meta": { + "total_preferences": len(all_preferences), + "total_conflict_groups": len(conflict_groups), + "generator": "generate_profiles_v2.py" + } + } + + +def generate_profiles_batch( + num_profiles: int, + output_path: Path, + model: str = "meta-llama/Llama-3.1-70B-Instruct", + seed: int = 42 +): + """Generate multiple profiles.""" + generator = ProfileGenerator(model=model, seed=seed) + profiles = [] + + for i in range(num_profiles): + user_id = f"user_{hashlib.md5(f'{seed}_{i}'.encode()).hexdigest()[:8]}" + print(f"\n[{i+1}/{num_profiles}] Generating profile: {user_id}") + + try: + profile = generator.generate_profile(user_id) + profiles.append(profile) + print(f" Generated {profile['meta']['total_preferences']} preferences") + except Exception as e: + print(f" ERROR: {e}") + continue + + # Save + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w') as f: + for profile in profiles: + f.write(json.dumps(profile) + '\n') + + print(f"\n{'='*60}") + print(f"Generated {len(profiles)} profiles") + print(f"Saved to: {output_path}") + + return profiles + + +# ============================================================================= +# Fallback: Generate from Schema (No LLM Required) +# ============================================================================= + +def generate_profiles_from_schema( + num_profiles: int, + schema_path: Path, + output_path: Path, + seed: int = 42 +) -> List[Dict]: + """ + Generate profiles from the predefined schema (no LLM calls). + Useful for testing or when API is unavailable. + """ + with open(schema_path) as f: + schema = json.load(f) + + random.seed(seed) + profiles = [] + + # Extract all preferences from schema + all_prefs = [] + for cat in schema["preference_categories"]: + all_prefs.extend(cat["preferences"]) + + # Sample personas + sample_personas = [ + "A senior backend engineer who values efficiency and directness. Prefers practical solutions over theoretical discussions.", + "A PhD student in ML who is meticulous about mathematical rigor. Appreciates step-by-step derivations.", + "A junior developer learning full-stack. Prefers patient, incremental explanations with examples.", + "A DevOps engineer focused on automation. Wants concise, actionable answers with commands to run.", + "A data scientist who thinks visually. Prefers intuition before formulas and lots of examples.", + "A tech lead reviewing code from their team. Focuses on maintainability and best practices.", + "A researcher prototyping quickly. Wants working code fast, willing to refactor later.", + "A student preparing for technical interviews. Needs step-by-step problem solving practice.", + ] + + for i in range(num_profiles): + user_id = f"user_{hashlib.md5(f'{seed}_{i}'.encode()).hexdigest()[:8]}" + + # Select random subset of preferences (35-45) + num_prefs = random.randint(35, 45) + selected_prefs = random.sample(all_prefs, min(num_prefs, len(all_prefs))) + + # Build conflict groups + conflict_groups = {} + for pref in selected_prefs: + cg = pref.get("conflict_group") + if cg: + if cg not in conflict_groups: + conflict_groups[cg] = [] + conflict_groups[cg].append(pref["pref_id"]) + + profile = { + "user_id": user_id, + "persona": random.choice(sample_personas), + "preferences": selected_prefs, + "conflict_groups": conflict_groups, + "meta": { + "total_preferences": len(selected_prefs), + "total_conflict_groups": len(conflict_groups), + "generator": "schema_based" + } + } + profiles.append(profile) + + # Save + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w') as f: + for profile in profiles: + f.write(json.dumps(profile) + '\n') + + print(f"Generated {len(profiles)} profiles from schema") + return profiles + + +# ============================================================================= +# Main +# ============================================================================= + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate complex user profiles with conditional preferences" + ) + parser.add_argument("--num_profiles", type=int, default=100, + help="Number of profiles to generate") + parser.add_argument("--output", type=str, + default="collaborativeagents/data/complex_profiles_v2/profiles.jsonl") + parser.add_argument("--model", type=str, + default="meta-llama/Llama-3.1-70B-Instruct", + help="LLM model for generation") + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--from_schema", type=str, default=None, + help="Generate from schema file instead of LLM") + + args = parser.parse_args() + output_path = Path(args.output) + + if args.from_schema: + generate_profiles_from_schema( + num_profiles=args.num_profiles, + schema_path=Path(args.from_schema), + output_path=output_path, + seed=args.seed + ) + else: + generate_profiles_batch( + num_profiles=args.num_profiles, + output_path=output_path, + model=args.model, + seed=args.seed + ) |
