summaryrefslogtreecommitdiff
path: root/collaborativeagents/scripts/generate_profiles_v2.py
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-01-27 09:57:37 -0600
committerYurenHao0426 <blackhao0426@gmail.com>2026-01-27 09:57:37 -0600
commitdc801c07cf38b0c495686463e6ca6f871a64440e (patch)
tree599f03114775921dbc472403c701f4a3a8ea188a /collaborativeagents/scripts/generate_profiles_v2.py
parente43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 (diff)
Add collaborativeagents module and update gitignore
- Add collaborativeagents subproject with adapters, agents, and evaluation modules - Update .gitignore to exclude large binary files (.whl, .tar), wandb logs, and results Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'collaborativeagents/scripts/generate_profiles_v2.py')
-rw-r--r--collaborativeagents/scripts/generate_profiles_v2.py475
1 files changed, 475 insertions, 0 deletions
diff --git a/collaborativeagents/scripts/generate_profiles_v2.py b/collaborativeagents/scripts/generate_profiles_v2.py
new file mode 100644
index 0000000..c431302
--- /dev/null
+++ b/collaborativeagents/scripts/generate_profiles_v2.py
@@ -0,0 +1,475 @@
+"""
+Generate 100 complex user profiles with ~40 conditional preferences using LLM.
+
+Key differences from original CollaborativeAgents:
+1. 40 conditional preferences (vs their 3 flat preferences)
+2. Preferences have explicit conditions for when they apply
+3. Conflict groups marked for testing conflict resolution
+4. LLM-based batch generation with quality control
+"""
+
+import json
+import random
+import hashlib
+from pathlib import Path
+from dataclasses import dataclass, field, asdict
+from typing import Optional, List, Dict, Any
+import argparse
+
+try:
+ import litellm
+except ImportError:
+ litellm = None
+
+
+# =============================================================================
+# Preference Category Definitions
+# =============================================================================
+
+PREFERENCE_CATEGORIES = {
+ "response_format": {
+ "num_preferences": 4,
+ "conflicts": [("rf_bullets", "rf_numbered"), ("rf_answer_first", "rf_build_up")],
+ "prompt": """Generate 4 preferences about response FORMAT:
+1. When to use bullet points vs numbered lists
+2. When to lead with the answer vs build up to it
+
+Each must have:
+- A SPECIFIC condition (trigger phrase or situation)
+- A clear action (what to do)
+- Conflict group (format_structure or answer_position)
+- Priority keywords that trigger this preference
+
+Make conditions mutually exclusive within each conflict group."""
+ },
+
+ "verbosity": {
+ "num_preferences": 5,
+ "conflicts": [("vb_concise", "vb_detailed"), ("vb_explain_why", "vb_just_answer")],
+ "prompt": """Generate 5 preferences about VERBOSITY/LENGTH:
+1. When to be concise (user says "quick", "briefly", "TL;DR")
+2. When to be detailed (complex topics, "explain", "in depth")
+3. When to explain reasoning vs just give answer
+
+Include explicit trigger phrases in conditions.
+Conflict groups: response_length, explanation_depth"""
+ },
+
+ "code_style": {
+ "num_preferences": 8,
+ "conflicts": [
+ ("cs_snake", "cs_camel", "cs_sql_upper"), # By language
+ ("cs_inline_comments", "cs_docstrings"), # Comment style
+ ("cs_bugs_only", "cs_full_review") # Review scope
+ ],
+ "prompt": """Generate 8 preferences about CODE STYLE:
+1-3. Naming conventions BY LANGUAGE (Python=snake_case, JS=camelCase, SQL=UPPERCASE)
+4-5. Comment styles for short snippets vs production code
+6-7. Code review scope (bugs only vs style too)
+8. Error handling preference
+
+Conflict groups: naming_convention, comment_style, review_scope"""
+ },
+
+ "math_style": {
+ "num_preferences": 6,
+ "conflicts": [("ms_show_steps", "ms_high_level"), ("ms_intuition", "ms_formula")],
+ "prompt": """Generate 6 preferences about MATHEMATICAL explanations:
+1-2. When to show detailed steps vs high-level approach
+3-4. When to lead with intuition vs formula (statistics vs pure math)
+5. How to structure proofs
+6. Practice problems when studying for exams
+
+Conflict groups: math_detail, math_approach"""
+ },
+
+ "interaction_pattern": {
+ "num_preferences": 6,
+ "conflicts": [("ip_confirm", "ip_execute"), ("ip_recommend", "ip_compare")],
+ "prompt": """Generate 6 preferences about INTERACTION patterns:
+1-2. When to confirm before acting vs execute directly
+3-4. When to recommend vs present options/comparison
+5. How to handle user frustration
+6. How to handle user thanks/satisfaction
+
+Conflict groups: autonomy, guidance_style"""
+ },
+
+ "domain_specific": {
+ "num_preferences": 6,
+ "conflicts": [("ds_example_first", "ds_definition_first")],
+ "prompt": """Generate 6 DOMAIN-SPECIFIC preferences:
+1. ML explanations (include math formulation)
+2. System design (components list before interactions)
+3. API/library usage (example first)
+4. Theoretical concepts (definition first)
+5. Data structures (include complexity)
+6. Documentation style
+
+Conflict group: example_position"""
+ },
+
+ "error_correction": {
+ "num_preferences": 4,
+ "conflicts": [("ec_gentle", "ec_direct")],
+ "prompt": """Generate 4 preferences about ERROR CORRECTION:
+1. Minor terminology errors (correct gently inline)
+2. Fundamental misconceptions (address directly)
+3. Code bugs
+4. Agent's own mistakes
+
+Conflict group: correction_style"""
+ },
+
+ "output_artifacts": {
+ "num_preferences": 4,
+ "conflicts": [("oa_single_block", "oa_chunked")],
+ "prompt": """Generate 4 preferences about OUTPUT format:
+1. Copyable code (single block)
+2. Teaching code (chunked with explanations)
+3. Terminal commands (bash blocks with expected output)
+4. Always specify language in code fences
+
+Conflict group: code_presentation"""
+ }
+}
+
+
+LLM_PREFERENCE_GENERATION_PROMPT = """You are generating CONDITIONAL user preferences for a personalization benchmark.
+
+# Category: {category_name}
+# Number of preferences to generate: {num_preferences}
+
+{category_prompt}
+
+# Output Requirements
+Generate exactly {num_preferences} preferences in this JSON format:
+```json
+{{
+ "preferences": [
+ {{
+ "pref_id": "{prefix}_001",
+ "condition": "When X happens / When user says Y / For Z type of content",
+ "action": "Do A, B, C (be specific)",
+ "conflict_group": "group_name_or_null",
+ "priority_context": ["keyword1", "keyword2", "phrase1"]
+ }},
+ ...
+ ]
+}}
+```
+
+# Critical Rules:
+1. Conditions must be SPECIFIC and OBSERVABLE (include trigger phrases)
+2. Within a conflict group, conditions must be MUTUALLY EXCLUSIVE
+3. Priority_context keywords should appear in queries that trigger this preference
+4. Actions must be concrete and verifiable
+
+Generate preferences that will:
+- Create interesting conflicts (RAG should resolve correctly, context methods fail)
+- Be testable (we can verify if an agent followed them)
+- Be realistic (based on actual user behavior)
+
+Output ONLY the JSON, no other text."""
+
+
+PERSONA_GENERATION_PROMPT = """Generate a realistic user persona (2-3 sentences) that would naturally have these preference categories:
+{categories}
+
+The persona should be a software developer, researcher, or technical professional. Include:
+- Professional background (role, experience level, domain)
+- Communication style tendencies
+- Work context
+
+Output ONLY the persona text, no JSON or formatting."""
+
+
+# =============================================================================
+# Profile Generator
+# =============================================================================
+
+class ProfileGenerator:
+ """Generate complex user profiles with conditional preferences."""
+
+ def __init__(self, model: str = "meta-llama/Llama-3.1-70B-Instruct", seed: int = 42):
+ self.model = model
+ self.random = random.Random(seed)
+
+ if litellm is None:
+ raise ImportError("litellm required for profile generation")
+
+ def _call_llm(self, prompt: str, json_mode: bool = True) -> str:
+ """Call LLM with prompt."""
+ kwargs = {
+ "model": self.model,
+ "messages": [{"role": "user", "content": prompt}],
+ "temperature": 0.7,
+ "max_tokens": 4096,
+ }
+
+ if json_mode:
+ kwargs["response_format"] = {"type": "json_object"}
+
+ response = litellm.completion(**kwargs)
+ return response.choices[0].message.content
+
+ def _parse_json(self, text: str) -> dict:
+ """Parse JSON from response."""
+ import re
+
+ try:
+ return json.loads(text)
+ except json.JSONDecodeError:
+ pass
+
+ # Try markdown code block
+ match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
+ if match:
+ try:
+ return json.loads(match.group(1))
+ except:
+ pass
+
+ # Try to find JSON object
+ match = re.search(r'\{[\s\S]*\}', text)
+ if match:
+ try:
+ return json.loads(match.group())
+ except:
+ pass
+
+ raise ValueError(f"Failed to parse JSON from: {text[:500]}")
+
+ def generate_preferences_for_category(
+ self,
+ category: str,
+ prefix: str
+ ) -> List[Dict]:
+ """Generate preferences for a single category."""
+ cat_info = PREFERENCE_CATEGORIES[category]
+
+ prompt = LLM_PREFERENCE_GENERATION_PROMPT.format(
+ category_name=category,
+ num_preferences=cat_info["num_preferences"],
+ category_prompt=cat_info["prompt"],
+ prefix=prefix
+ )
+
+ response = self._call_llm(prompt, json_mode=True)
+ data = self._parse_json(response)
+
+ prefs = data.get("preferences", data)
+ if isinstance(prefs, dict):
+ prefs = list(prefs.values())
+
+ # Validate and fix pref_ids
+ for i, pref in enumerate(prefs):
+ if "pref_id" not in pref:
+ pref["pref_id"] = f"{prefix}_{i+1:03d}"
+
+ return prefs
+
+ def generate_persona(self, categories: List[str]) -> str:
+ """Generate a persona for the given preference categories."""
+ prompt = PERSONA_GENERATION_PROMPT.format(
+ categories=", ".join(categories)
+ )
+
+ return self._call_llm(prompt, json_mode=False).strip()
+
+ def generate_profile(self, user_id: str) -> Dict:
+ """Generate a complete user profile with ~40 preferences."""
+ all_preferences = []
+ category_prefixes = {
+ "response_format": "rf",
+ "verbosity": "vb",
+ "code_style": "cs",
+ "math_style": "ms",
+ "interaction_pattern": "ip",
+ "domain_specific": "ds",
+ "error_correction": "ec",
+ "output_artifacts": "oa"
+ }
+
+ print(f" Generating preferences for {user_id}...")
+ for category, prefix in category_prefixes.items():
+ try:
+ prefs = self.generate_preferences_for_category(category, prefix)
+ all_preferences.extend(prefs)
+ print(f" {category}: {len(prefs)} preferences")
+ except Exception as e:
+ print(f" ERROR in {category}: {e}")
+
+ # Generate persona
+ print(f" Generating persona...")
+ persona = self.generate_persona(list(category_prefixes.keys()))
+
+ # Build conflict groups mapping
+ conflict_groups = {}
+ for pref in all_preferences:
+ cg = pref.get("conflict_group")
+ if cg:
+ if cg not in conflict_groups:
+ conflict_groups[cg] = []
+ conflict_groups[cg].append(pref["pref_id"])
+
+ return {
+ "user_id": user_id,
+ "persona": persona,
+ "preferences": all_preferences,
+ "conflict_groups": conflict_groups,
+ "meta": {
+ "total_preferences": len(all_preferences),
+ "total_conflict_groups": len(conflict_groups),
+ "generator": "generate_profiles_v2.py"
+ }
+ }
+
+
+def generate_profiles_batch(
+ num_profiles: int,
+ output_path: Path,
+ model: str = "meta-llama/Llama-3.1-70B-Instruct",
+ seed: int = 42
+):
+ """Generate multiple profiles."""
+ generator = ProfileGenerator(model=model, seed=seed)
+ profiles = []
+
+ for i in range(num_profiles):
+ user_id = f"user_{hashlib.md5(f'{seed}_{i}'.encode()).hexdigest()[:8]}"
+ print(f"\n[{i+1}/{num_profiles}] Generating profile: {user_id}")
+
+ try:
+ profile = generator.generate_profile(user_id)
+ profiles.append(profile)
+ print(f" Generated {profile['meta']['total_preferences']} preferences")
+ except Exception as e:
+ print(f" ERROR: {e}")
+ continue
+
+ # Save
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ with open(output_path, 'w') as f:
+ for profile in profiles:
+ f.write(json.dumps(profile) + '\n')
+
+ print(f"\n{'='*60}")
+ print(f"Generated {len(profiles)} profiles")
+ print(f"Saved to: {output_path}")
+
+ return profiles
+
+
+# =============================================================================
+# Fallback: Generate from Schema (No LLM Required)
+# =============================================================================
+
+def generate_profiles_from_schema(
+ num_profiles: int,
+ schema_path: Path,
+ output_path: Path,
+ seed: int = 42
+) -> List[Dict]:
+ """
+ Generate profiles from the predefined schema (no LLM calls).
+ Useful for testing or when API is unavailable.
+ """
+ with open(schema_path) as f:
+ schema = json.load(f)
+
+ random.seed(seed)
+ profiles = []
+
+ # Extract all preferences from schema
+ all_prefs = []
+ for cat in schema["preference_categories"]:
+ all_prefs.extend(cat["preferences"])
+
+ # Sample personas
+ sample_personas = [
+ "A senior backend engineer who values efficiency and directness. Prefers practical solutions over theoretical discussions.",
+ "A PhD student in ML who is meticulous about mathematical rigor. Appreciates step-by-step derivations.",
+ "A junior developer learning full-stack. Prefers patient, incremental explanations with examples.",
+ "A DevOps engineer focused on automation. Wants concise, actionable answers with commands to run.",
+ "A data scientist who thinks visually. Prefers intuition before formulas and lots of examples.",
+ "A tech lead reviewing code from their team. Focuses on maintainability and best practices.",
+ "A researcher prototyping quickly. Wants working code fast, willing to refactor later.",
+ "A student preparing for technical interviews. Needs step-by-step problem solving practice.",
+ ]
+
+ for i in range(num_profiles):
+ user_id = f"user_{hashlib.md5(f'{seed}_{i}'.encode()).hexdigest()[:8]}"
+
+ # Select random subset of preferences (35-45)
+ num_prefs = random.randint(35, 45)
+ selected_prefs = random.sample(all_prefs, min(num_prefs, len(all_prefs)))
+
+ # Build conflict groups
+ conflict_groups = {}
+ for pref in selected_prefs:
+ cg = pref.get("conflict_group")
+ if cg:
+ if cg not in conflict_groups:
+ conflict_groups[cg] = []
+ conflict_groups[cg].append(pref["pref_id"])
+
+ profile = {
+ "user_id": user_id,
+ "persona": random.choice(sample_personas),
+ "preferences": selected_prefs,
+ "conflict_groups": conflict_groups,
+ "meta": {
+ "total_preferences": len(selected_prefs),
+ "total_conflict_groups": len(conflict_groups),
+ "generator": "schema_based"
+ }
+ }
+ profiles.append(profile)
+
+ # Save
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ with open(output_path, 'w') as f:
+ for profile in profiles:
+ f.write(json.dumps(profile) + '\n')
+
+ print(f"Generated {len(profiles)} profiles from schema")
+ return profiles
+
+
+# =============================================================================
+# Main
+# =============================================================================
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Generate complex user profiles with conditional preferences"
+ )
+ parser.add_argument("--num_profiles", type=int, default=100,
+ help="Number of profiles to generate")
+ parser.add_argument("--output", type=str,
+ default="collaborativeagents/data/complex_profiles_v2/profiles.jsonl")
+ parser.add_argument("--model", type=str,
+ default="meta-llama/Llama-3.1-70B-Instruct",
+ help="LLM model for generation")
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument("--from_schema", type=str, default=None,
+ help="Generate from schema file instead of LLM")
+
+ args = parser.parse_args()
+ output_path = Path(args.output)
+
+ if args.from_schema:
+ generate_profiles_from_schema(
+ num_profiles=args.num_profiles,
+ schema_path=Path(args.from_schema),
+ output_path=output_path,
+ seed=args.seed
+ )
+ else:
+ generate_profiles_batch(
+ num_profiles=args.num_profiles,
+ output_path=output_path,
+ model=args.model,
+ seed=args.seed
+ )