1 files changed, 475 insertions, 0 deletions
diff --git a/collaborativeagents/scripts/generate_profiles_v2.py b/collaborativeagents/scripts/generate_profiles_v2.py
new file mode 100644
index 0000000..c431302
--- /dev/null
+++ b/collaborativeagents/scripts/generate_profiles_v2.py
@@ -0,0 +1,475 @@
+"""
+Generate 100 complex user profiles with ~40 conditional preferences using LLM.
+
+Key differences from original CollaborativeAgents:
+1. 40 conditional preferences (vs their 3 flat preferences)
+2. Preferences have explicit conditions for when they apply
+3. Conflict groups marked for testing conflict resolution
+4. LLM-based batch generation with quality control
+"""
+
+import json
+import random
+import hashlib
+from pathlib import Path
+from dataclasses import dataclass, field, asdict
+from typing import Optional, List, Dict, Any
+import argparse
+
+try:
+    import litellm
+except ImportError:
+    litellm = None
+
+
+# =============================================================================
+# Preference Category Definitions
+# =============================================================================
+
+PREFERENCE_CATEGORIES = {
+    "response_format": {
+        "num_preferences": 4,
+        "conflicts": [("rf_bullets", "rf_numbered"), ("rf_answer_first", "rf_build_up")],
+        "prompt": """Generate 4 preferences about response FORMAT:
+1. When to use bullet points vs numbered lists
+2. When to lead with the answer vs build up to it
+
+Each must have:
+- A SPECIFIC condition (trigger phrase or situation)
+- A clear action (what to do)
+- Conflict group (format_structure or answer_position)
+- Priority keywords that trigger this preference
+
+Make conditions mutually exclusive within each conflict group."""
+    },
+
+    "verbosity": {
+        "num_preferences": 5,
+        "conflicts": [("vb_concise", "vb_detailed"), ("vb_explain_why", "vb_just_answer")],
+        "prompt": """Generate 5 preferences about VERBOSITY/LENGTH:
+1. When to be concise (user says "quick", "briefly", "TL;DR")
+2. When to be detailed (complex topics, "explain", "in depth")
+3. When to explain reasoning vs just give answer
+
+Include explicit trigger phrases in conditions.
+Conflict groups: response_length, explanation_depth"""
+    },
+
+    "code_style": {
+        "num_preferences": 8,
+        "conflicts": [
+            ("cs_snake", "cs_camel", "cs_sql_upper"),  # By language
+            ("cs_inline_comments", "cs_docstrings"),  # Comment style
+            ("cs_bugs_only", "cs_full_review")  # Review scope
+        ],
+        "prompt": """Generate 8 preferences about CODE STYLE:
+1-3. Naming conventions BY LANGUAGE (Python=snake_case, JS=camelCase, SQL=UPPERCASE)
+4-5. Comment styles for short snippets vs production code
+6-7. Code review scope (bugs only vs style too)
+8. Error handling preference
+
+Conflict groups: naming_convention, comment_style, review_scope"""
+    },
+
+    "math_style": {
+        "num_preferences": 6,
+        "conflicts": [("ms_show_steps", "ms_high_level"), ("ms_intuition", "ms_formula")],
+        "prompt": """Generate 6 preferences about MATHEMATICAL explanations:
+1-2. When to show detailed steps vs high-level approach
+3-4. When to lead with intuition vs formula (statistics vs pure math)
+5. How to structure proofs
+6. Practice problems when studying for exams
+
+Conflict groups: math_detail, math_approach"""
+    },
+
+    "interaction_pattern": {
+        "num_preferences": 6,
+        "conflicts": [("ip_confirm", "ip_execute"), ("ip_recommend", "ip_compare")],
+        "prompt": """Generate 6 preferences about INTERACTION patterns:
+1-2. When to confirm before acting vs execute directly
+3-4. When to recommend vs present options/comparison
+5. How to handle user frustration
+6. How to handle user thanks/satisfaction
+
+Conflict groups: autonomy, guidance_style"""
+    },
+
+    "domain_specific": {
+        "num_preferences": 6,
+        "conflicts": [("ds_example_first", "ds_definition_first")],
+        "prompt": """Generate 6 DOMAIN-SPECIFIC preferences:
+1. ML explanations (include math formulation)
+2. System design (components list before interactions)
+3. API/library usage (example first)
+4. Theoretical concepts (definition first)
+5. Data structures (include complexity)
+6. Documentation style
+
+Conflict group: example_position"""
+    },
+
+    "error_correction": {
+        "num_preferences": 4,
+        "conflicts": [("ec_gentle", "ec_direct")],
+        "prompt": """Generate 4 preferences about ERROR CORRECTION:
+1. Minor terminology errors (correct gently inline)
+2. Fundamental misconceptions (address directly)
+3. Code bugs
+4. Agent's own mistakes
+
+Conflict group: correction_style"""
+    },
+
+    "output_artifacts": {
+        "num_preferences": 4,
+        "conflicts": [("oa_single_block", "oa_chunked")],
+        "prompt": """Generate 4 preferences about OUTPUT format:
+1. Copyable code (single block)
+2. Teaching code (chunked with explanations)
+3. Terminal commands (bash blocks with expected output)
+4. Always specify language in code fences
+
+Conflict group: code_presentation"""
+    }
+}
+
+
+LLM_PREFERENCE_GENERATION_PROMPT = """You are generating CONDITIONAL user preferences for a personalization benchmark.
+
+# Category: {category_name}
+# Number of preferences to generate: {num_preferences}
+
+{category_prompt}
+
+# Output Requirements
+Generate exactly {num_preferences} preferences in this JSON format:
+```json
+{{
+    "preferences": [
+        {{
+            "pref_id": "{prefix}_001",
+            "condition": "When X happens / When user says Y / For Z type of content",
+            "action": "Do A, B, C (be specific)",
+            "conflict_group": "group_name_or_null",
+            "priority_context": ["keyword1", "keyword2", "phrase1"]
+        }},
+        ...
+    ]
+}}
+```
+
+# Critical Rules:
+1. Conditions must be SPECIFIC and OBSERVABLE (include trigger phrases)
+2. Within a conflict group, conditions must be MUTUALLY EXCLUSIVE
+3. Priority_context keywords should appear in queries that trigger this preference
+4. Actions must be concrete and verifiable
+
+Generate preferences that will:
+- Create interesting conflicts (RAG should resolve correctly, context methods fail)
+- Be testable (we can verify if an agent followed them)
+- Be realistic (based on actual user behavior)
+
+Output ONLY the JSON, no other text."""
+
+
+PERSONA_GENERATION_PROMPT = """Generate a realistic user persona (2-3 sentences) that would naturally have these preference categories:
+{categories}
+
+The persona should be a software developer, researcher, or technical professional. Include:
+- Professional background (role, experience level, domain)
+- Communication style tendencies
+- Work context
+
+Output ONLY the persona text, no JSON or formatting."""
+
+
+# =============================================================================
+# Profile Generator
+# =============================================================================
+
+class ProfileGenerator:
+    """Generate complex user profiles with conditional preferences."""
+
+    def __init__(self, model: str = "meta-llama/Llama-3.1-70B-Instruct", seed: int = 42):
+        self.model = model
+        self.random = random.Random(seed)
+
+        if litellm is None:
+            raise ImportError("litellm required for profile generation")
+
+    def _call_llm(self, prompt: str, json_mode: bool = True) -> str:
+        """Call LLM with prompt."""
+        kwargs = {
+            "model": self.model,
+            "messages": [{"role": "user", "content": prompt}],
+            "temperature": 0.7,
+            "max_tokens": 4096,
+        }
+
+        if json_mode:
+            kwargs["response_format"] = {"type": "json_object"}
+
+        response = litellm.completion(**kwargs)
+        return response.choices[0].message.content
+
+    def _parse_json(self, text: str) -> dict:
+        """Parse JSON from response."""
+        import re
+
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError:
+            pass
+
+        # Try markdown code block
+        match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
+        if match:
+            try:
+                return json.loads(match.group(1))
+            except:
+                pass
+
+        # Try to find JSON object
+        match = re.search(r'\{[\s\S]*\}', text)
+        if match:
+            try:
+                return json.loads(match.group())
+            except:
+                pass
+
+        raise ValueError(f"Failed to parse JSON from: {text[:500]}")
+
+    def generate_preferences_for_category(
+        self,
+        category: str,
+        prefix: str
+    ) -> List[Dict]:
+        """Generate preferences for a single category."""
+        cat_info = PREFERENCE_CATEGORIES[category]
+
+        prompt = LLM_PREFERENCE_GENERATION_PROMPT.format(
+            category_name=category,
+            num_preferences=cat_info["num_preferences"],
+            category_prompt=cat_info["prompt"],
+            prefix=prefix
+        )
+
+        response = self._call_llm(prompt, json_mode=True)
+        data = self._parse_json(response)
+
+        prefs = data.get("preferences", data)
+        if isinstance(prefs, dict):
+            prefs = list(prefs.values())
+
+        # Validate and fix pref_ids
+        for i, pref in enumerate(prefs):
+            if "pref_id" not in pref:
+                pref["pref_id"] = f"{prefix}_{i+1:03d}"
+
+        return prefs
+
+    def generate_persona(self, categories: List[str]) -> str:
+        """Generate a persona for the given preference categories."""
+        prompt = PERSONA_GENERATION_PROMPT.format(
+            categories=", ".join(categories)
+        )
+
+        return self._call_llm(prompt, json_mode=False).strip()
+
+    def generate_profile(self, user_id: str) -> Dict:
+        """Generate a complete user profile with ~40 preferences."""
+        all_preferences = []
+        category_prefixes = {
+            "response_format": "rf",
+            "verbosity": "vb",
+            "code_style": "cs",
+            "math_style": "ms",
+            "interaction_pattern": "ip",
+            "domain_specific": "ds",
+            "error_correction": "ec",
+            "output_artifacts": "oa"
+        }
+
+        print(f"  Generating preferences for {user_id}...")
+        for category, prefix in category_prefixes.items():
+            try:
+                prefs = self.generate_preferences_for_category(category, prefix)
+                all_preferences.extend(prefs)
+                print(f"    {category}: {len(prefs)} preferences")
+            except Exception as e:
+                print(f"    ERROR in {category}: {e}")
+
+        # Generate persona
+        print(f"  Generating persona...")
+        persona = self.generate_persona(list(category_prefixes.keys()))
+
+        # Build conflict groups mapping
+        conflict_groups = {}
+        for pref in all_preferences:
+            cg = pref.get("conflict_group")
+            if cg:
+                if cg not in conflict_groups:
+                    conflict_groups[cg] = []
+                conflict_groups[cg].append(pref["pref_id"])
+
+        return {
+            "user_id": user_id,
+            "persona": persona,
+            "preferences": all_preferences,
+            "conflict_groups": conflict_groups,
+            "meta": {
+                "total_preferences": len(all_preferences),
+                "total_conflict_groups": len(conflict_groups),
+                "generator": "generate_profiles_v2.py"
+            }
+        }
+
+
+def generate_profiles_batch(
+    num_profiles: int,
+    output_path: Path,
+    model: str = "meta-llama/Llama-3.1-70B-Instruct",
+    seed: int = 42
+):
+    """Generate multiple profiles."""
+    generator = ProfileGenerator(model=model, seed=seed)
+    profiles = []
+
+    for i in range(num_profiles):
+        user_id = f"user_{hashlib.md5(f'{seed}_{i}'.encode()).hexdigest()[:8]}"
+        print(f"\n[{i+1}/{num_profiles}] Generating profile: {user_id}")
+
+        try:
+            profile = generator.generate_profile(user_id)
+            profiles.append(profile)
+            print(f"  Generated {profile['meta']['total_preferences']} preferences")
+        except Exception as e:
+            print(f"  ERROR: {e}")
+            continue
+
+    # Save
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, 'w') as f:
+        for profile in profiles:
+            f.write(json.dumps(profile) + '\n')
+
+    print(f"\n{'='*60}")
+    print(f"Generated {len(profiles)} profiles")
+    print(f"Saved to: {output_path}")
+
+    return profiles
+
+
+# =============================================================================
+# Fallback: Generate from Schema (No LLM Required)
+# =============================================================================
+
+def generate_profiles_from_schema(
+    num_profiles: int,
+    schema_path: Path,
+    output_path: Path,
+    seed: int = 42
+) -> List[Dict]:
+    """
+    Generate profiles from the predefined schema (no LLM calls).
+    Useful for testing or when API is unavailable.
+    """
+    with open(schema_path) as f:
+        schema = json.load(f)
+
+    random.seed(seed)
+    profiles = []
+
+    # Extract all preferences from schema
+    all_prefs = []
+    for cat in schema["preference_categories"]:
+        all_prefs.extend(cat["preferences"])
+
+    # Sample personas
+    sample_personas = [
+        "A senior backend engineer who values efficiency and directness. Prefers practical solutions over theoretical discussions.",
+        "A PhD student in ML who is meticulous about mathematical rigor. Appreciates step-by-step derivations.",
+        "A junior developer learning full-stack. Prefers patient, incremental explanations with examples.",
+        "A DevOps engineer focused on automation. Wants concise, actionable answers with commands to run.",
+        "A data scientist who thinks visually. Prefers intuition before formulas and lots of examples.",
+        "A tech lead reviewing code from their team. Focuses on maintainability and best practices.",
+        "A researcher prototyping quickly. Wants working code fast, willing to refactor later.",
+        "A student preparing for technical interviews. Needs step-by-step problem solving practice.",
+    ]
+
+    for i in range(num_profiles):
+        user_id = f"user_{hashlib.md5(f'{seed}_{i}'.encode()).hexdigest()[:8]}"
+
+        # Select random subset of preferences (35-45)
+        num_prefs = random.randint(35, 45)
+        selected_prefs = random.sample(all_prefs, min(num_prefs, len(all_prefs)))
+
+        # Build conflict groups
+        conflict_groups = {}
+        for pref in selected_prefs:
+            cg = pref.get("conflict_group")
+            if cg:
+                if cg not in conflict_groups:
+                    conflict_groups[cg] = []
+                conflict_groups[cg].append(pref["pref_id"])
+
+        profile = {
+            "user_id": user_id,
+            "persona": random.choice(sample_personas),
+            "preferences": selected_prefs,
+            "conflict_groups": conflict_groups,
+            "meta": {
+                "total_preferences": len(selected_prefs),
+                "total_conflict_groups": len(conflict_groups),
+                "generator": "schema_based"
+            }
+        }
+        profiles.append(profile)
+
+    # Save
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, 'w') as f:
+        for profile in profiles:
+            f.write(json.dumps(profile) + '\n')
+
+    print(f"Generated {len(profiles)} profiles from schema")
+    return profiles
+
+
+# =============================================================================
+# Main
+# =============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generate complex user profiles with conditional preferences"
+    )
+    parser.add_argument("--num_profiles", type=int, default=100,
+                        help="Number of profiles to generate")
+    parser.add_argument("--output", type=str,
+                        default="collaborativeagents/data/complex_profiles_v2/profiles.jsonl")
+    parser.add_argument("--model", type=str,
+                        default="meta-llama/Llama-3.1-70B-Instruct",
+                        help="LLM model for generation")
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--from_schema", type=str, default=None,
+                        help="Generate from schema file instead of LLM")
+
+    args = parser.parse_args()
+    output_path = Path(args.output)
+
+    if args.from_schema:
+        generate_profiles_from_schema(
+            num_profiles=args.num_profiles,
+            schema_path=Path(args.from_schema),
+            output_path=output_path,
+            seed=args.seed
+        )
+    else:
+        generate_profiles_batch(
+            num_profiles=args.num_profiles,
+            output_path=output_path,
+            model=args.model,
+            seed=args.seed
+        )