diff options
Diffstat (limited to 'collaborativeagents')
18 files changed, 1607 insertions, 126 deletions
diff --git a/collaborativeagents/adapters/personalized_llm_adapter.py b/collaborativeagents/adapters/personalized_llm_adapter.py index b476272..488b241 100644 --- a/collaborativeagents/adapters/personalized_llm_adapter.py +++ b/collaborativeagents/adapters/personalized_llm_adapter.py @@ -36,12 +36,24 @@ class AdapterConfig: enable_rl_updates: bool = True use_user_vector: bool = True # Whether to use user vector in policy scoring - # Paths (absolute paths to actual file locations in the repo) + # Paths - computed relative to project root # Note: Using empty_store to start fresh - RAG will accumulate memories during evaluation - user_store_path: str = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/data/users/collab_eval_store.npz" - memory_cards_path: str = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/data/corpora/empty_store/memory_cards.jsonl" - memory_embeddings_path: str = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/data/corpora/empty_store/memory_embeddings.npy" - item_projection_path: str = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/data/corpora/item_projection.npz" + _project_root: str = field(default_factory=lambda: str(Path(__file__).parent.parent.parent)) + user_store_path: str = "" + memory_cards_path: str = "" + memory_embeddings_path: str = "" + item_projection_path: str = "" + + def __post_init__(self): + root = Path(self._project_root) + if not self.user_store_path: + self.user_store_path = str(root / "data/users/collab_eval_store.npz") + if not self.memory_cards_path: + self.memory_cards_path = str(root / "data/corpora/empty_store/memory_cards.jsonl") + if not self.memory_embeddings_path: + self.memory_embeddings_path = str(root / "data/corpora/empty_store/memory_embeddings.npy") + if not self.item_projection_path: + self.item_projection_path = str(root / "data/corpora/item_projection.npz") # Multi-GPU assignment device_assignment: Optional[Dict[str, str]] = None @@ -64,10 +76,30 @@ class AdapterConfig: # vLLM URL for local reward model (only used when reward_mode="llm_local") reward_vllm_url: str = "http://localhost:8005/v1" + # Retrieval optimizations + enable_query_transform: bool = False # Transform queries for better retrieval matching + enable_global_preferences: bool = False # Separate global prefs that bypass retrieval + enable_preference_rewrite: bool = False # Use LLM to rewrite/merge retrieved preferences + + # Dynamic topk settings + dynamic_topk: bool = False # Use dynamic selection based on rerank score distribution + dynamic_min_k: int = 3 # Minimum preferences to select + dynamic_max_k: int = 8 # Maximum preferences to select + dynamic_score_ratio: float = 0.5 # Threshold = top_score * ratio + + # RL learning rate overrides + eta_long: float = None # Override RL learning rate for z_long (default 0.01) + eta_short: float = None # Override RL learning rate for z_short (default 0.05) + + # Session-level preference consolidation + enable_preference_consolidation: bool = False # Consolidate preferences at session end + consolidation_threshold: int = 5 # Min preferences before consolidation kicks in + # Reward mapping for user behavior preference_enforcement_reward: float = -0.8 # Negative reward when user enforces disappointment_expression_reward: float = -0.4 # Milder negative for disappointment positive_feedback_reward: float = 0.5 # When user expresses satisfaction + no_enforcement_reward: float = 0.1 # Small positive when user doesn't enforce (good turn) task_completion_reward: float = 1.0 # When task is solved correctly @@ -120,6 +152,17 @@ class PersonalizedLLMAdapter: best_of_n=self.config.best_of_n, reward_mode=self.config.reward_mode, reward_vllm_url=self.config.reward_vllm_url, + enable_query_transform=self.config.enable_query_transform, + enable_global_preferences=self.config.enable_global_preferences, + enable_preference_rewrite=self.config.enable_preference_rewrite, + dynamic_topk=self.config.dynamic_topk, + dynamic_min_k=self.config.dynamic_min_k, + dynamic_max_k=self.config.dynamic_max_k, + dynamic_score_ratio=self.config.dynamic_score_ratio, + eta_long=self.config.eta_long, + eta_short=self.config.eta_short, + enable_preference_consolidation=self.config.enable_preference_consolidation, + consolidation_threshold=self.config.consolidation_threshold, ) self._initialized = True print("[Adapter] Initialization complete.") @@ -221,7 +264,9 @@ class PersonalizedLLMAdapter: self.initialize() # Use chat_prepare from PersonalizedLLM - result = self._llm.chat_prepare(self._current_user_id, query) + # skip_extraction=False to enable preference extraction from user messages + # skip_auto_reward=True because batch framework handles rewards via process_user_turn + result = self._llm.chat_prepare(self._current_user_id, query, skip_extraction=False, skip_auto_reward=True) return result["messages"], result["context"] def process_response( @@ -294,24 +339,38 @@ class PersonalizedLLMAdapter: This is called AFTER generate_response and BEFORE the next turn. """ # Derive reward from user behavior - reward = 0.0 - gating = 1.0 # Always apply (could be conditional) + # Key insight: ALWAYS give a reward signal, not just for enforcement + # - Enforcement: negative reward (user had to correct agent) + # - No enforcement: small positive reward (agent did well) + # - Satisfaction/progress: larger positive reward + gating = 1.0 # Always apply if enforce_preferences: - reward = self.config.preference_enforcement_reward + reward = self.config.preference_enforcement_reward # -0.8 self._session_metrics["enforcements"] += 1 self._total_enforcements += 1 elif express_disappointment: - reward = self.config.disappointment_expression_reward + reward = self.config.disappointment_expression_reward # -0.4 self._session_metrics["disappointments"] += 1 self._total_disappointments += 1 elif express_satisfaction or draft_answer_updated: - reward = self.config.positive_feedback_reward + reward = self.config.positive_feedback_reward # +0.5 - # Apply feedback to PersonalizedLLM - if self.config.enable_rl_updates and reward != 0.0: + else: + # No enforcement = good turn, give small positive reward + reward = self.config.no_enforcement_reward # +0.1 + + # Apply feedback to PersonalizedLLM (always, not just when reward != 0) + if self.config.enable_rl_updates: + # Debug: check if pending_rl_update exists + ctx = self._llm._sessions.get(self._current_user_id) + has_pending = ctx is not None and ctx.pending_rl_update is not None + has_chosen = (has_pending and + len(ctx.pending_rl_update.get("last_chosen_indices", [])) > 0) if has_pending else False + print(f"[DEBUG-RL] User={self._current_user_id} reward={reward:.2f} " + f"has_pending={has_pending} has_chosen={has_chosen}") feedback = Feedback( user_id=self._current_user_id, turn_id=self._turn_counter - 1, @@ -380,6 +439,66 @@ class PersonalizedLLMAdapter: if self._initialized: self._llm.persist() + def export_all_user_vectors(self) -> Dict[str, Dict[str, Any]]: + """ + Export all user vectors with full state for analysis. + + Returns: + Dict mapping user_id to dict containing: + - z_long: np.ndarray (long-term user vector) + - z_short: np.ndarray (short-term user vector) + - z_long_norm: float + - z_short_norm: float + - reward_ma: float (reward moving average) + """ + if not self._initialized: + return {} + + result = {} + for user_id, state in self._llm._user_store._states.items(): + result[user_id] = { + "z_long": state.z_long.tolist(), + "z_short": state.z_short.tolist(), + "z_long_norm": float(np.linalg.norm(state.z_long)), + "z_short_norm": float(np.linalg.norm(state.z_short)), + "reward_ma": float(state.reward_ma), + } + return result + + def export_user_vectors_npz(self, output_path: str) -> None: + """ + Export all user vectors to a numpy .npz file for efficient storage and analysis. + + Args: + output_path: Path to save the .npz file + + The saved file contains: + - user_ids: array of user IDs + - z_long: [n_users, k] array of long-term vectors + - z_short: [n_users, k] array of short-term vectors + - reward_ma: [n_users] array of reward moving averages + """ + if not self._initialized: + return + + states = self._llm._user_store._states + if not states: + return + + user_ids = list(states.keys()) + z_long = np.stack([states[uid].z_long for uid in user_ids]) + z_short = np.stack([states[uid].z_short for uid in user_ids]) + reward_ma = np.array([states[uid].reward_ma for uid in user_ids]) + + np.savez( + output_path, + user_ids=np.array(user_ids), + z_long=z_long, + z_short=z_short, + reward_ma=reward_ma, + ) + print(f"[Adapter] Exported {len(user_ids)} user vectors to {output_path}") + # ========================================================================= # CollaborativeAgents Interface Methods # ========================================================================= @@ -491,6 +610,45 @@ def create_baseline_adapter( use_user_vector=False, # No user vector in policy llm_name=llm_name, use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + device_assignment={ + "embed": "cuda:2", + "reranker": "cuda:3", + "extractor": "cuda:2", + }, + ), + # Baseline 6b: RAG with dynamic topk (min=3, max=8, ratio=0.5) + "rag_dynamic": AdapterConfig( + mode="nopersonal", + enable_preference_extraction=True, + enable_rl_updates=False, + use_user_vector=False, + llm_name=llm_name, + use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + dynamic_topk=True, + dynamic_min_k=3, + dynamic_max_k=8, + dynamic_score_ratio=0.5, + device_assignment={ + "embed": "cuda:2", + "reranker": "cuda:3", + "extractor": "cuda:2", + }, + ), + # Baseline 6c: RAG with preference rewrite (LLM merges preferences) + "rag_rewrite": AdapterConfig( + mode="nopersonal", + enable_preference_extraction=True, + enable_rl_updates=False, + use_user_vector=False, + llm_name=llm_name, + use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + enable_preference_rewrite=True, # NEW: Use LLM to merge preferences device_assignment={ "embed": "cuda:2", "reranker": "cuda:3", @@ -506,6 +664,89 @@ def create_baseline_adapter( use_user_vector=True, llm_name=llm_name, use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + device_assignment={ + "embed": "cuda:2", + "reranker": "cuda:3", + "extractor": "cuda:2", + }, + ), + # Baseline 7a: RAG + Vector + Preference Rewrite (combines best of both) + "rag_rewrite_vector": AdapterConfig( + mode="full", + enable_preference_extraction=True, + enable_rl_updates=True, + use_user_vector=True, + llm_name=llm_name, + use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + enable_preference_rewrite=True, # LLM merges preferences + device_assignment={ + "embed": "cuda:2", + "reranker": "cuda:3", + "extractor": "cuda:2", + }, + ), + # Baseline 7b: RAG + Vector with higher learning rate (10x) + "rag_vector_fast": AdapterConfig( + mode="full", + enable_preference_extraction=True, + enable_rl_updates=True, + use_user_vector=True, + llm_name=llm_name, + use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + eta_long=0.1, # 10x default (0.01) + eta_short=0.5, # 10x default (0.05) + device_assignment={ + "embed": "cuda:2", + "reranker": "cuda:3", + "extractor": "cuda:2", + }, + ), + # Baseline 7c: RAG + Vector with session-level preference consolidation + "rag_vector_consolidate": AdapterConfig( + mode="full", + enable_preference_extraction=True, + enable_rl_updates=True, + use_user_vector=True, + llm_name=llm_name, + use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + enable_preference_consolidation=True, + consolidation_threshold=5, + device_assignment={ + "embed": "cuda:2", + "reranker": "cuda:3", + "extractor": "cuda:2", + }, + ), + # Baseline 7d: RAG + Vector with balanced rewards (10x LR + no_enforcement_reward) + # Key improvements: + # - 10x learning rate for faster adaptation + # - Small positive reward for turns without enforcement (+0.1) + # - Disappointment detection enabled + # - Balanced reward signal for proper REINFORCE learning + "rag_vector_balanced": AdapterConfig( + mode="full", + enable_preference_extraction=True, + enable_rl_updates=True, + use_user_vector=True, + llm_name=llm_name, + use_shared_models=use_shared_models, + enable_query_transform=True, + enable_global_preferences=True, + eta_long=0.1, # 10x default + eta_short=0.5, # 10x default + # Balanced reward structure + preference_enforcement_reward=-0.8, + disappointment_expression_reward=-0.4, + positive_feedback_reward=0.5, + no_enforcement_reward=0.1, # Key: positive signal for good turns device_assignment={ "embed": "cuda:2", "reranker": "cuda:3", diff --git a/collaborativeagents/adapters/reflection_adapter.py b/collaborativeagents/adapters/reflection_adapter.py index d535be2..451c694 100644 --- a/collaborativeagents/adapters/reflection_adapter.py +++ b/collaborativeagents/adapters/reflection_adapter.py @@ -111,6 +111,18 @@ class ReflectionAdapter: return result["content"] + def _prepend_notes_to_conversation(self, conversation, notes_text): + """Prepend notes to the first message of a conversation copy.""" + if conversation: + conversation = list(conversation) + conversation[0] = dict(conversation[0]) + conversation[0]["content"] = ( + f"Remember, you have been taking notes throughout past conversations " + f"about user preferences. Use whatever is relevant in these notes to " + f"guide your response:\n{notes_text}\n\n" + conversation[0]["content"] + ) + return conversation + def _add_scaffolding_to_conversation( self, conversation: List[Dict[str, str]], @@ -118,73 +130,73 @@ class ReflectionAdapter: ) -> List[Dict[str, str]]: """ Add scaffolding (memory notes) to conversation. - - This is the EXACT logic from CollaboratorAgent.add_scaffolding_to_conversation(). - - If with_proper_scaffolding=True: Use LLM to extract relevant notes - If with_proper_scaffolding=False: Prepend all notes to first message + Sequential version - for non-batch use only. """ if not self.with_proper_scaffolding: - # Simple mode: prepend all notes - if conversation: - conversation = list(conversation) # Copy to avoid mutation - conversation[0] = dict(conversation[0]) # Copy first message - conversation[0]["content"] = ( - f"Remember, you have been taking notes throughout past conversations " - f"about user preferences. Use whatever is relevant in these notes to " - f"guide your response:\n{agent_notes}\n\n" + conversation[0]["content"] - ) - return conversation + return self._prepend_notes_to_conversation(conversation, agent_notes) else: - # Proper scaffolding: use LLM to extract relevant notes - conversation_str = get_conversation_string(conversation) - formatted_prompt = proper_scaffolding_prompt.format( - conversation_history=conversation_str, - complete_agent_notes=agent_notes - ) - - # Call LLM to extract relevant notes (with retries) - for attempt in range(3): - try: - messages = [{"role": "user", "content": formatted_prompt}] - response = self._generate(messages, max_new_tokens=512) - - parsed = repair_json(response, return_objects=True) - missing_keys = [k for k in ["reasoning", "relevant_notes"] if k not in parsed] - - if missing_keys: - print(f"[ReflectionAdapter] Scaffolding missing keys: {missing_keys}") - continue - - scaffolded_notes = parsed["relevant_notes"] - - # Prepend extracted notes to first message - if conversation: - conversation = list(conversation) - conversation[0] = dict(conversation[0]) - conversation[0]["content"] = ( - f"Remember, you have been taking notes throughout past conversations " - f"about user preferences. Use these notes to guide your response:\n" - f"{scaffolded_notes}\n\n" + conversation[0]["content"] - ) - - return conversation + prompt = self.get_scaffolding_prompt(conversation, agent_notes) + if prompt is None: + return self._prepend_notes_to_conversation(conversation, agent_notes) + response = self._generate([{"role": "user", "content": prompt}], max_new_tokens=512) + return self.apply_scaffolding_response(conversation, agent_notes, response) + + def get_scaffolding_prompt(self, conversation, agent_notes): + """Build the scaffolding prompt for batch processing. Returns None if no scaffolding needed.""" + if not self.with_proper_scaffolding: + return None + conversation_str = get_conversation_string(conversation) + return proper_scaffolding_prompt.format( + conversation_history=conversation_str, + complete_agent_notes=agent_notes + ) - except Exception as e: - print(f"[ReflectionAdapter] Scaffolding attempt {attempt+1} failed: {e}") - continue + def apply_scaffolding_response(self, conversation, agent_notes, response): + """Apply a scaffolding LLM response to the conversation.""" + try: + parsed = repair_json(response, return_objects=True) + if "relevant_notes" in parsed: + notes_text = parsed["relevant_notes"] + if conversation: + conversation = list(conversation) + conversation[0] = dict(conversation[0]) + conversation[0]["content"] = ( + f"Remember, you have been taking notes throughout past conversations " + f"about user preferences. Use these notes to guide your response:\n" + f"{notes_text}\n\n" + conversation[0]["content"] + ) + return conversation + except Exception: + pass + # Fallback: use all notes + return self._prepend_notes_to_conversation(conversation, agent_notes) + + def get_note_update_prompt(self, user_id=None): + """Build the note-update prompt for batch processing. Returns (messages, user_id) or None.""" + uid = user_id or self._current_user_id + if not uid or not self._conversation_history: + return None + current_notes = self._user_notes.get(uid, "No notes yet.") + conversation_str = get_conversation_string(self._conversation_history) + prompt = update_agent_notes_prompt.format( + agent_notes=current_notes, + conversation_str=conversation_str + ) + return [{"role": "user", "content": prompt}] - # Fallback: use all notes if retrieval fails - print("[ReflectionAdapter] Scaffolding failed, using full notes") - if conversation: - conversation = list(conversation) - conversation[0] = dict(conversation[0]) - conversation[0]["content"] = ( - f"Remember, you have been taking notes throughout past conversations " - f"about user preferences. Use whatever is relevant in these notes to " - f"guide your response:\n{agent_notes}\n\n" + conversation[0]["content"] - ) - return conversation + def apply_note_update_response(self, response, user_id=None): + """Apply a note-update LLM response.""" + uid = user_id or self._current_user_id + if not uid: + return + try: + # 8B model: use raw response directly + updated_notes = response.strip() + if updated_notes: + old_len = len(self._user_notes.get(uid, "")) + self._user_notes[uid] = updated_notes + except Exception: + pass def start_session(self, user_id: str, user_profile: dict = None): """Start a new session for a user.""" @@ -266,11 +278,27 @@ class ReflectionAdapter: # Get current notes for this user agent_notes = self._user_notes.get(self._current_user_id, "No notes yet about this user.") - # Build conversation with scaffolding (may involve LLM call for proper_scaffolding) + # Store for batch scaffolding (prepare_prompt may be called after batch scaffolding) + self._pending_agent_notes = agent_notes + self._pending_scaffolded = False + + # Build conversation with scaffolding if self.with_scaffolding and agent_notes != "No notes yet about this user.": - conversation_with_notes = self._add_scaffolding_to_conversation( - self._conversation_history, agent_notes - ) + if hasattr(self, '_scaffolding_result') and self._scaffolding_result is not None: + # Use pre-computed batch scaffolding result + conversation_with_notes = self.apply_scaffolding_response( + list(self._conversation_history), agent_notes, self._scaffolding_result) + self._scaffolding_result = None + self._pending_scaffolded = True + elif not self.with_proper_scaffolding: + conversation_with_notes = self._prepend_notes_to_conversation( + self._conversation_history, agent_notes) + self._pending_scaffolded = True + else: + # Sequential fallback - should not happen in batch mode + conversation_with_notes = self._add_scaffolding_to_conversation( + self._conversation_history, agent_notes) + self._pending_scaffolded = True else: conversation_with_notes = self._conversation_history @@ -357,30 +385,24 @@ class ReflectionAdapter: return None # All retries failed, keep old notes - def end_session(self, task_success: bool = False) -> Dict[str, Any]: + def end_session(self, task_success: bool = False, skip_note_update: bool = False) -> Dict[str, Any]: """ End session and update agent notes via reflection. - Uses the ORIGINAL update_agent_notes logic from CollaborativeAgents. + Args: + task_success: Whether the task was completed successfully + skip_note_update: If True, skip note update (already done via batch) """ if not self._current_user_id: return {} - # Get current notes - current_notes = self._user_notes.get(self._current_user_id, "No notes yet.") - - # Update notes via session-level reflection - if len(self._conversation_history) > 0: - result = self._update_agent_notes(current_notes, self._conversation_history) - + # Update notes via session-level reflection (skip if batch already did it) + if not skip_note_update and len(self._conversation_history) > 0: + result = self._update_agent_notes( + self._user_notes.get(self._current_user_id, "No notes yet."), + self._conversation_history) if result is not None and "agent_notes" in result: - updated_notes = result["agent_notes"] - self._user_notes[self._current_user_id] = updated_notes - print(f"[ReflectionAdapter] Updated notes for {self._current_user_id} " - f"({len(current_notes)} -> {len(updated_notes)} chars)") - else: - print(f"[ReflectionAdapter] Keeping old notes for {self._current_user_id} " - f"(update failed)") + self._user_notes[self._current_user_id] = result["agent_notes"] return { "turns": len(self._conversation_history), diff --git a/collaborativeagents/adapters/reflection_grpo_adapter.py b/collaborativeagents/adapters/reflection_grpo_adapter.py index 09c5b26..3c10942 100644 --- a/collaborativeagents/adapters/reflection_grpo_adapter.py +++ b/collaborativeagents/adapters/reflection_grpo_adapter.py @@ -18,10 +18,11 @@ import torch from transformers import AutoModelForCausalLM, AutoTokenizer from json_repair import repair_json -# Model paths - Use GRPO-trained model if available, fallback to base -GRPO_MODEL_PATH = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/outputs/grpo_reflection/final" -SFT_MODEL_PATH = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/outputs/sft_reflection" -DEFAULT_MODEL_PATH = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" +# Model paths - computed relative to project root +_PROJECT_ROOT = Path(__file__).parent.parent.parent +GRPO_MODEL_PATH = str(_PROJECT_ROOT / "collaborativeagents/training/outputs/grpo_reflection/final") +SFT_MODEL_PATH = str(_PROJECT_ROOT / "collaborativeagents/training/outputs/sft_reflection") +DEFAULT_MODEL_PATH = str(_PROJECT_ROOT / "models/llama-3.1-8b-instruct") def get_best_available_model(): """Get the best available model path (GRPO > SFT > base).""" diff --git a/collaborativeagents/agents/local_user_agent.py b/collaborativeagents/agents/local_user_agent.py index eae311e..fd085c4 100644 --- a/collaborativeagents/agents/local_user_agent.py +++ b/collaborativeagents/agents/local_user_agent.py @@ -11,9 +11,11 @@ from typing import List, Dict, Any, Optional from copy import deepcopy from json_repair import repair_json -# Default model paths -DEFAULT_MODEL_PATH_8B = "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct" -DEFAULT_MODEL_PATH_70B = "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4" +# Default model paths - computed relative to project root +from pathlib import Path +_PROJECT_ROOT = Path(__file__).parent.parent.parent +DEFAULT_MODEL_PATH_8B = str(_PROJECT_ROOT / "models/llama-3.1-8b-instruct") +DEFAULT_MODEL_PATH_70B = str(_PROJECT_ROOT / "models/llama-3.1-70b-instruct") # Use 70B by default for better user simulation DEFAULT_MODEL_PATH = DEFAULT_MODEL_PATH_70B diff --git a/collaborativeagents/data/complex_profiles_v2/active_profiles_8.jsonl b/collaborativeagents/data/complex_profiles_v2/active_profiles_8.jsonl new file mode 100644 index 0000000..926d290 --- /dev/null +++ b/collaborativeagents/data/complex_profiles_v2/active_profiles_8.jsonl @@ -0,0 +1,8 @@ +{"user_id": "user_4c247208", "persona": "A researcher prototyping quickly. Wants working code fast, willing to refactor later.", "preferences": [{"pref_id": "vb_001", "condition": "I say 'quick question' or 'briefly'", "action": "respond in 3 sentences or fewer, no elaboration", "conflict_group": "response_length", "priority_context": ["time_pressure", "simple_query"]}, {"pref_id": "oa_002", "condition": "teaching through code examples", "action": "break code into small chunks with explanation between each", "conflict_group": "code_presentation", "priority_context": ["teach", "learn", "understand"]}, {"pref_id": "rf_004", "condition": "teaching a new concept", "action": "build up intuition before giving the formal definition", "conflict_group": "answer_position", "priority_context": ["learning", "explain", "why"]}, {"pref_id": "cs_006", "condition": "I ask for a code review", "action": "focus only on bugs and logic errors, ignore style issues", "conflict_group": "review_scope", "priority_context": ["review", "check", "look at"]}, {"pref_id": "oa_004", "condition": "providing commands to run", "action": "use bash code blocks, include expected output as comments", "conflict_group": null, "priority_context": ["command", "run", "terminal"]}, {"pref_id": "rf_003", "condition": "providing a final answer or conclusion", "action": "put the answer first, then explanation", "conflict_group": "answer_position", "priority_context": ["direct_question", "what_is"]}, {"pref_id": "ms_005", "condition": "discussing proofs", "action": "structure as: claim, approach sketch, formal proof, intuition recap", "conflict_group": "proof_style", "priority_context": ["prove", "proof", "show that"]}, {"pref_id": "ec_004", "condition": "I ask you to correct your previous response", "action": "acknowledge the error explicitly, then provide corrected version", "conflict_group": null, "priority_context": ["you were wrong", "that's not right", "actually"]}, {"pref_id": "ds_004", "condition": "explaining a theoretical concept", "action": "start with definition, then example, then edge cases", "conflict_group": "example_position", "priority_context": ["concept", "theory", "what is"]}, {"pref_id": "cs_007", "condition": "I ask to improve or refactor code", "action": "address both logic and style, suggest modern idioms", "conflict_group": "review_scope", "priority_context": ["improve", "refactor", "better"]}, {"pref_id": "ms_001", "condition": "solving algebraic equations", "action": "show each manipulation step with the operation applied noted", "conflict_group": "math_detail", "priority_context": ["solve", "equation", "algebra"]}, {"pref_id": "ds_003", "condition": "discussing APIs or library usage", "action": "show a minimal working example before explaining parameters", "conflict_group": "example_position", "priority_context": ["api", "library", "how to use"]}, {"pref_id": "vb_003", "condition": "I ask 'why' or 'how come'", "action": "always explain the underlying reasoning, not just the what", "conflict_group": "explanation_depth", "priority_context": ["curiosity", "understanding"]}, {"pref_id": "ds_006", "condition": "writing or reviewing documentation", "action": "be concise, avoid marketing language, focus on usage", "conflict_group": null, "priority_context": ["documentation", "docs", "readme"]}, {"pref_id": "ec_001", "condition": "I make a minor error in terminology", "action": "correct it gently inline without making it a focus", "conflict_group": "correction_style", "priority_context": ["minor_error", "terminology"]}, {"pref_id": "ip_001", "condition": "the task is complex with multiple parts", "action": "confirm the plan before executing, break into phases", "conflict_group": "autonomy", "priority_context": ["complex", "multiple", "project"]}, {"pref_id": "ms_002", "condition": "discussing statistics or probability", "action": "start with intuition and real-world interpretation before formulas", "conflict_group": "math_approach", "priority_context": ["probability", "statistics", "likelihood"]}, {"pref_id": "ip_004", "condition": "I'm comparing alternatives", "action": "present trade-offs in a table format with clear criteria", "conflict_group": "guidance_style", "priority_context": ["compare", "vs", "or", "which"]}, {"pref_id": "ds_005", "condition": "discussing data structures", "action": "always include time complexity for operations mentioned", "conflict_group": null, "priority_context": ["data structure", "array", "tree", "hash"]}, {"pref_id": "vb_004", "condition": "I'm debugging and say 'it doesn't work'", "action": "focus on diagnosing the specific issue, skip general explanations", "conflict_group": "explanation_depth", "priority_context": ["debugging", "error", "fix"]}, {"pref_id": "ip_002", "condition": "I give a clear and specific instruction", "action": "execute directly without asking for confirmation", "conflict_group": "autonomy", "priority_context": ["do this", "make this", "specific_instruction"]}, {"pref_id": "cs_005", "condition": "the code is a complete module or class", "action": "use docstrings at function/class level, minimal inline comments", "conflict_group": "comment_style", "priority_context": ["module", "class", "production"]}, {"pref_id": "ds_002", "condition": "discussing system design or architecture", "action": "describe components as a list first, then explain interactions", "conflict_group": null, "priority_context": ["design", "architecture", "system"]}, {"pref_id": "oa_003", "condition": "any response with code", "action": "always specify the language in the code fence", "conflict_group": null, "priority_context": ["code"]}, {"pref_id": "ms_003", "condition": "I ask to verify my calculation", "action": "check my work step by step, point out where I diverged if wrong", "conflict_group": null, "priority_context": ["verify", "check", "is this right"]}, {"pref_id": "ec_002", "condition": "I have a fundamental misconception", "action": "address the misconception directly and clearly before proceeding", "conflict_group": "correction_style", "priority_context": ["misconception", "fundamental_error"]}, {"pref_id": "cs_003", "condition": "writing SQL queries", "action": "use UPPERCASE for keywords, lowercase for table/column names", "conflict_group": "naming_convention", "priority_context": ["sql", "database", "query"]}, {"pref_id": "ds_001", "condition": "discussing machine learning concepts", "action": "include the mathematical formulation alongside intuitive explanation", "conflict_group": null, "priority_context": ["ml", "machine learning", "model"]}, {"pref_id": "ms_004", "condition": "the problem involves calculus", "action": "state the rule being applied (chain rule, integration by parts, etc.)", "conflict_group": "math_detail", "priority_context": ["derivative", "integral", "calculus"]}, {"pref_id": "ip_005", "condition": "I express frustration or say 'this is annoying'", "action": "acknowledge the difficulty briefly, then provide direct help", "conflict_group": null, "priority_context": ["frustration", "annoying", "stuck"]}, {"pref_id": "cs_008", "condition": "providing error handling examples", "action": "always use specific exception types, never bare except", "conflict_group": null, "priority_context": ["error", "exception", "try"]}, {"pref_id": "rf_001", "condition": "listing multiple items or options", "action": "use bullet points with consistent indentation", "conflict_group": "format_structure", "priority_context": ["enumeration", "comparison"]}, {"pref_id": "cs_002", "condition": "writing JavaScript or TypeScript code", "action": "use camelCase for variables, PascalCase for classes", "conflict_group": "naming_convention", "priority_context": ["javascript", "js", "typescript", "ts"]}, {"pref_id": "rf_002", "condition": "explaining a sequential process or procedure", "action": "use numbered steps with clear transitions", "conflict_group": "format_structure", "priority_context": ["tutorial", "how-to", "setup"]}, {"pref_id": "cs_004", "condition": "the code snippet is short (under 20 lines)", "action": "include inline comments explaining each logical block", "conflict_group": "comment_style", "priority_context": ["example", "snippet"]}], "conflict_groups": {"response_length": ["vb_001"], "code_presentation": ["oa_002"], "answer_position": ["rf_004", "rf_003"], "review_scope": ["cs_006", "cs_007"], "proof_style": ["ms_005"], "example_position": ["ds_004", "ds_003"], "math_detail": ["ms_001", "ms_004"], "explanation_depth": ["vb_003", "vb_004"], "correction_style": ["ec_001", "ec_002"], "autonomy": ["ip_001", "ip_002"], "math_approach": ["ms_002"], "guidance_style": ["ip_004"], "comment_style": ["cs_005", "cs_004"], "naming_convention": ["cs_003", "cs_002"], "format_structure": ["rf_001", "rf_002"]}, "meta": {"total_preferences": 35, "total_conflict_groups": 15, "generator": "schema_based"}} +{"user_id": "user_bfdd83da", "persona": "A DevOps engineer focused on automation. Wants concise, actionable answers with commands to run.", "preferences": [{"pref_id": "cs_005", "condition": "the code is a complete module or class", "action": "use docstrings at function/class level, minimal inline comments", "conflict_group": "comment_style", "priority_context": ["module", "class", "production"]}, {"pref_id": "oa_002", "condition": "teaching through code examples", "action": "break code into small chunks with explanation between each", "conflict_group": "code_presentation", "priority_context": ["teach", "learn", "understand"]}, {"pref_id": "cs_008", "condition": "providing error handling examples", "action": "always use specific exception types, never bare except", "conflict_group": null, "priority_context": ["error", "exception", "try"]}, {"pref_id": "ds_004", "condition": "explaining a theoretical concept", "action": "start with definition, then example, then edge cases", "conflict_group": "example_position", "priority_context": ["concept", "theory", "what is"]}, {"pref_id": "ds_003", "condition": "discussing APIs or library usage", "action": "show a minimal working example before explaining parameters", "conflict_group": "example_position", "priority_context": ["api", "library", "how to use"]}, {"pref_id": "oa_003", "condition": "any response with code", "action": "always specify the language in the code fence", "conflict_group": null, "priority_context": ["code"]}, {"pref_id": "rf_004", "condition": "teaching a new concept", "action": "build up intuition before giving the formal definition", "conflict_group": "answer_position", "priority_context": ["learning", "explain", "why"]}, {"pref_id": "vb_002", "condition": "the topic involves complex algorithms or mathematics", "action": "provide detailed step-by-step derivation with intermediate results", "conflict_group": "response_length", "priority_context": ["complex_topic", "proof", "derivation"]}, {"pref_id": "ip_005", "condition": "I express frustration or say 'this is annoying'", "action": "acknowledge the difficulty briefly, then provide direct help", "conflict_group": null, "priority_context": ["frustration", "annoying", "stuck"]}, {"pref_id": "ms_001", "condition": "solving algebraic equations", "action": "show each manipulation step with the operation applied noted", "conflict_group": "math_detail", "priority_context": ["solve", "equation", "algebra"]}, {"pref_id": "rf_003", "condition": "providing a final answer or conclusion", "action": "put the answer first, then explanation", "conflict_group": "answer_position", "priority_context": ["direct_question", "what_is"]}, {"pref_id": "rf_001", "condition": "listing multiple items or options", "action": "use bullet points with consistent indentation", "conflict_group": "format_structure", "priority_context": ["enumeration", "comparison"]}, {"pref_id": "cs_002", "condition": "writing JavaScript or TypeScript code", "action": "use camelCase for variables, PascalCase for classes", "conflict_group": "naming_convention", "priority_context": ["javascript", "js", "typescript", "ts"]}, {"pref_id": "ip_002", "condition": "I give a clear and specific instruction", "action": "execute directly without asking for confirmation", "conflict_group": "autonomy", "priority_context": ["do this", "make this", "specific_instruction"]}, {"pref_id": "vb_001", "condition": "I say 'quick question' or 'briefly'", "action": "respond in 3 sentences or fewer, no elaboration", "conflict_group": "response_length", "priority_context": ["time_pressure", "simple_query"]}, {"pref_id": "ms_004", "condition": "the problem involves calculus", "action": "state the rule being applied (chain rule, integration by parts, etc.)", "conflict_group": "math_detail", "priority_context": ["derivative", "integral", "calculus"]}, {"pref_id": "vb_005", "condition": "I explicitly share my current understanding first", "action": "acknowledge what I got right, then correct only the gaps", "conflict_group": null, "priority_context": ["validation", "checking"]}, {"pref_id": "ec_001", "condition": "I make a minor error in terminology", "action": "correct it gently inline without making it a focus", "conflict_group": "correction_style", "priority_context": ["minor_error", "terminology"]}, {"pref_id": "ip_001", "condition": "the task is complex with multiple parts", "action": "confirm the plan before executing, break into phases", "conflict_group": "autonomy", "priority_context": ["complex", "multiple", "project"]}, {"pref_id": "cs_006", "condition": "I ask for a code review", "action": "focus only on bugs and logic errors, ignore style issues", "conflict_group": "review_scope", "priority_context": ["review", "check", "look at"]}, {"pref_id": "ds_005", "condition": "discussing data structures", "action": "always include time complexity for operations mentioned", "conflict_group": null, "priority_context": ["data structure", "array", "tree", "hash"]}, {"pref_id": "oa_004", "condition": "providing commands to run", "action": "use bash code blocks, include expected output as comments", "conflict_group": null, "priority_context": ["command", "run", "terminal"]}, {"pref_id": "ms_006", "condition": "I'm practicing for an exam", "action": "after solving, give a similar practice problem", "conflict_group": null, "priority_context": ["practice", "exam", "test"]}, {"pref_id": "ec_004", "condition": "I ask you to correct your previous response", "action": "acknowledge the error explicitly, then provide corrected version", "conflict_group": null, "priority_context": ["you were wrong", "that's not right", "actually"]}, {"pref_id": "ec_002", "condition": "I have a fundamental misconception", "action": "address the misconception directly and clearly before proceeding", "conflict_group": "correction_style", "priority_context": ["misconception", "fundamental_error"]}, {"pref_id": "oa_001", "condition": "generating code that will be copied", "action": "provide code in a single copyable block, no interleaved explanation", "conflict_group": "code_presentation", "priority_context": ["copy", "use this", "give me code"]}, {"pref_id": "ip_006", "condition": "I thank you or say the answer was helpful", "action": "don't add unnecessary follow-up, just acknowledge briefly", "conflict_group": null, "priority_context": ["thanks", "helpful", "great"]}, {"pref_id": "rf_002", "condition": "explaining a sequential process or procedure", "action": "use numbered steps with clear transitions", "conflict_group": "format_structure", "priority_context": ["tutorial", "how-to", "setup"]}, {"pref_id": "ms_005", "condition": "discussing proofs", "action": "structure as: claim, approach sketch, formal proof, intuition recap", "conflict_group": "proof_style", "priority_context": ["prove", "proof", "show that"]}, {"pref_id": "ip_003", "condition": "I seem uncertain or ask 'what do you think'", "action": "provide a recommendation with brief rationale, not just options", "conflict_group": "guidance_style", "priority_context": ["uncertain", "should I", "what do you think"]}, {"pref_id": "cs_001", "condition": "writing Python code", "action": "use snake_case for variables and functions, include type hints", "conflict_group": "naming_convention", "priority_context": ["python", "py"]}, {"pref_id": "ip_004", "condition": "I'm comparing alternatives", "action": "present trade-offs in a table format with clear criteria", "conflict_group": "guidance_style", "priority_context": ["compare", "vs", "or", "which"]}, {"pref_id": "ds_006", "condition": "writing or reviewing documentation", "action": "be concise, avoid marketing language, focus on usage", "conflict_group": null, "priority_context": ["documentation", "docs", "readme"]}, {"pref_id": "vb_003", "condition": "I ask 'why' or 'how come'", "action": "always explain the underlying reasoning, not just the what", "conflict_group": "explanation_depth", "priority_context": ["curiosity", "understanding"]}, {"pref_id": "ds_002", "condition": "discussing system design or architecture", "action": "describe components as a list first, then explain interactions", "conflict_group": null, "priority_context": ["design", "architecture", "system"]}, {"pref_id": "ms_003", "condition": "I ask to verify my calculation", "action": "check my work step by step, point out where I diverged if wrong", "conflict_group": null, "priority_context": ["verify", "check", "is this right"]}, {"pref_id": "cs_003", "condition": "writing SQL queries", "action": "use UPPERCASE for keywords, lowercase for table/column names", "conflict_group": "naming_convention", "priority_context": ["sql", "database", "query"]}, {"pref_id": "cs_004", "condition": "the code snippet is short (under 20 lines)", "action": "include inline comments explaining each logical block", "conflict_group": "comment_style", "priority_context": ["example", "snippet"]}, {"pref_id": "vb_004", "condition": "I'm debugging and say 'it doesn't work'", "action": "focus on diagnosing the specific issue, skip general explanations", "conflict_group": "explanation_depth", "priority_context": ["debugging", "error", "fix"]}, {"pref_id": "ds_001", "condition": "discussing machine learning concepts", "action": "include the mathematical formulation alongside intuitive explanation", "conflict_group": null, "priority_context": ["ml", "machine learning", "model"]}], "conflict_groups": {"comment_style": ["cs_005", "cs_004"], "code_presentation": ["oa_002", "oa_001"], "example_position": ["ds_004", "ds_003"], "answer_position": ["rf_004", "rf_003"], "response_length": ["vb_002", "vb_001"], "math_detail": ["ms_001", "ms_004"], "format_structure": ["rf_001", "rf_002"], "naming_convention": ["cs_002", "cs_001", "cs_003"], "autonomy": ["ip_002", "ip_001"], "correction_style": ["ec_001", "ec_002"], "review_scope": ["cs_006"], "proof_style": ["ms_005"], "guidance_style": ["ip_003", "ip_004"], "explanation_depth": ["vb_003", "vb_004"]}, "meta": {"total_preferences": 40, "total_conflict_groups": 14, "generator": "schema_based"}} +{"user_id": "user_5d82ca72", "persona": "A data scientist who thinks visually. Prefers intuition before formulas and lots of examples.", "preferences": [{"pref_id": "ec_003", "condition": "my code has a bug", "action": "show the bug location, explain why it's wrong, provide the fix", "conflict_group": null, "priority_context": ["bug", "error", "wrong"]}, {"pref_id": "ip_001", "condition": "the task is complex with multiple parts", "action": "confirm the plan before executing, break into phases", "conflict_group": "autonomy", "priority_context": ["complex", "multiple", "project"]}, {"pref_id": "ds_002", "condition": "discussing system design or architecture", "action": "describe components as a list first, then explain interactions", "conflict_group": null, "priority_context": ["design", "architecture", "system"]}, {"pref_id": "ec_001", "condition": "I make a minor error in terminology", "action": "correct it gently inline without making it a focus", "conflict_group": "correction_style", "priority_context": ["minor_error", "terminology"]}, {"pref_id": "ds_005", "condition": "discussing data structures", "action": "always include time complexity for operations mentioned", "conflict_group": null, "priority_context": ["data structure", "array", "tree", "hash"]}, {"pref_id": "ms_006", "condition": "I'm practicing for an exam", "action": "after solving, give a similar practice problem", "conflict_group": null, "priority_context": ["practice", "exam", "test"]}, {"pref_id": "ip_005", "condition": "I express frustration or say 'this is annoying'", "action": "acknowledge the difficulty briefly, then provide direct help", "conflict_group": null, "priority_context": ["frustration", "annoying", "stuck"]}, {"pref_id": "oa_001", "condition": "generating code that will be copied", "action": "provide code in a single copyable block, no interleaved explanation", "conflict_group": "code_presentation", "priority_context": ["copy", "use this", "give me code"]}, {"pref_id": "ms_005", "condition": "discussing proofs", "action": "structure as: claim, approach sketch, formal proof, intuition recap", "conflict_group": "proof_style", "priority_context": ["prove", "proof", "show that"]}, {"pref_id": "oa_004", "condition": "providing commands to run", "action": "use bash code blocks, include expected output as comments", "conflict_group": null, "priority_context": ["command", "run", "terminal"]}, {"pref_id": "ds_001", "condition": "discussing machine learning concepts", "action": "include the mathematical formulation alongside intuitive explanation", "conflict_group": null, "priority_context": ["ml", "machine learning", "model"]}, {"pref_id": "ms_001", "condition": "solving algebraic equations", "action": "show each manipulation step with the operation applied noted", "conflict_group": "math_detail", "priority_context": ["solve", "equation", "algebra"]}, {"pref_id": "cs_001", "condition": "writing Python code", "action": "use snake_case for variables and functions, include type hints", "conflict_group": "naming_convention", "priority_context": ["python", "py"]}, {"pref_id": "vb_005", "condition": "I explicitly share my current understanding first", "action": "acknowledge what I got right, then correct only the gaps", "conflict_group": null, "priority_context": ["validation", "checking"]}, {"pref_id": "vb_004", "condition": "I'm debugging and say 'it doesn't work'", "action": "focus on diagnosing the specific issue, skip general explanations", "conflict_group": "explanation_depth", "priority_context": ["debugging", "error", "fix"]}, {"pref_id": "rf_004", "condition": "teaching a new concept", "action": "build up intuition before giving the formal definition", "conflict_group": "answer_position", "priority_context": ["learning", "explain", "why"]}, {"pref_id": "oa_003", "condition": "any response with code", "action": "always specify the language in the code fence", "conflict_group": null, "priority_context": ["code"]}, {"pref_id": "vb_003", "condition": "I ask 'why' or 'how come'", "action": "always explain the underlying reasoning, not just the what", "conflict_group": "explanation_depth", "priority_context": ["curiosity", "understanding"]}, {"pref_id": "cs_002", "condition": "writing JavaScript or TypeScript code", "action": "use camelCase for variables, PascalCase for classes", "conflict_group": "naming_convention", "priority_context": ["javascript", "js", "typescript", "ts"]}, {"pref_id": "ec_002", "condition": "I have a fundamental misconception", "action": "address the misconception directly and clearly before proceeding", "conflict_group": "correction_style", "priority_context": ["misconception", "fundamental_error"]}, {"pref_id": "ds_003", "condition": "discussing APIs or library usage", "action": "show a minimal working example before explaining parameters", "conflict_group": "example_position", "priority_context": ["api", "library", "how to use"]}, {"pref_id": "vb_002", "condition": "the topic involves complex algorithms or mathematics", "action": "provide detailed step-by-step derivation with intermediate results", "conflict_group": "response_length", "priority_context": ["complex_topic", "proof", "derivation"]}, {"pref_id": "ip_003", "condition": "I seem uncertain or ask 'what do you think'", "action": "provide a recommendation with brief rationale, not just options", "conflict_group": "guidance_style", "priority_context": ["uncertain", "should I", "what do you think"]}, {"pref_id": "ms_004", "condition": "the problem involves calculus", "action": "state the rule being applied (chain rule, integration by parts, etc.)", "conflict_group": "math_detail", "priority_context": ["derivative", "integral", "calculus"]}, {"pref_id": "cs_007", "condition": "I ask to improve or refactor code", "action": "address both logic and style, suggest modern idioms", "conflict_group": "review_scope", "priority_context": ["improve", "refactor", "better"]}, {"pref_id": "ds_004", "condition": "explaining a theoretical concept", "action": "start with definition, then example, then edge cases", "conflict_group": "example_position", "priority_context": ["concept", "theory", "what is"]}, {"pref_id": "cs_008", "condition": "providing error handling examples", "action": "always use specific exception types, never bare except", "conflict_group": null, "priority_context": ["error", "exception", "try"]}, {"pref_id": "oa_002", "condition": "teaching through code examples", "action": "break code into small chunks with explanation between each", "conflict_group": "code_presentation", "priority_context": ["teach", "learn", "understand"]}, {"pref_id": "rf_002", "condition": "explaining a sequential process or procedure", "action": "use numbered steps with clear transitions", "conflict_group": "format_structure", "priority_context": ["tutorial", "how-to", "setup"]}, {"pref_id": "cs_005", "condition": "the code is a complete module or class", "action": "use docstrings at function/class level, minimal inline comments", "conflict_group": "comment_style", "priority_context": ["module", "class", "production"]}, {"pref_id": "ip_004", "condition": "I'm comparing alternatives", "action": "present trade-offs in a table format with clear criteria", "conflict_group": "guidance_style", "priority_context": ["compare", "vs", "or", "which"]}, {"pref_id": "vb_001", "condition": "I say 'quick question' or 'briefly'", "action": "respond in 3 sentences or fewer, no elaboration", "conflict_group": "response_length", "priority_context": ["time_pressure", "simple_query"]}, {"pref_id": "cs_004", "condition": "the code snippet is short (under 20 lines)", "action": "include inline comments explaining each logical block", "conflict_group": "comment_style", "priority_context": ["example", "snippet"]}, {"pref_id": "ds_006", "condition": "writing or reviewing documentation", "action": "be concise, avoid marketing language, focus on usage", "conflict_group": null, "priority_context": ["documentation", "docs", "readme"]}, {"pref_id": "rf_003", "condition": "providing a final answer or conclusion", "action": "put the answer first, then explanation", "conflict_group": "answer_position", "priority_context": ["direct_question", "what_is"]}, {"pref_id": "cs_003", "condition": "writing SQL queries", "action": "use UPPERCASE for keywords, lowercase for table/column names", "conflict_group": "naming_convention", "priority_context": ["sql", "database", "query"]}, {"pref_id": "rf_001", "condition": "listing multiple items or options", "action": "use bullet points with consistent indentation", "conflict_group": "format_structure", "priority_context": ["enumeration", "comparison"]}, {"pref_id": "ms_002", "condition": "discussing statistics or probability", "action": "start with intuition and real-world interpretation before formulas", "conflict_group": "math_approach", "priority_context": ["probability", "statistics", "likelihood"]}, {"pref_id": "ip_006", "condition": "I thank you or say the answer was helpful", "action": "don't add unnecessary follow-up, just acknowledge briefly", "conflict_group": null, "priority_context": ["thanks", "helpful", "great"]}, {"pref_id": "cs_006", "condition": "I ask for a code review", "action": "focus only on bugs and logic errors, ignore style issues", "conflict_group": "review_scope", "priority_context": ["review", "check", "look at"]}, {"pref_id": "ip_002", "condition": "I give a clear and specific instruction", "action": "execute directly without asking for confirmation", "conflict_group": "autonomy", "priority_context": ["do this", "make this", "specific_instruction"]}, {"pref_id": "ms_003", "condition": "I ask to verify my calculation", "action": "check my work step by step, point out where I diverged if wrong", "conflict_group": null, "priority_context": ["verify", "check", "is this right"]}, {"pref_id": "ec_004", "condition": "I ask you to correct your previous response", "action": "acknowledge the error explicitly, then provide corrected version", "conflict_group": null, "priority_context": ["you were wrong", "that's not right", "actually"]}], "conflict_groups": {"autonomy": ["ip_001", "ip_002"], "correction_style": ["ec_001", "ec_002"], "code_presentation": ["oa_001", "oa_002"], "proof_style": ["ms_005"], "math_detail": ["ms_001", "ms_004"], "naming_convention": ["cs_001", "cs_002", "cs_003"], "explanation_depth": ["vb_004", "vb_003"], "answer_position": ["rf_004", "rf_003"], "example_position": ["ds_003", "ds_004"], "response_length": ["vb_002", "vb_001"], "guidance_style": ["ip_003", "ip_004"], "review_scope": ["cs_007", "cs_006"], "format_structure": ["rf_002", "rf_001"], "comment_style": ["cs_005", "cs_004"], "math_approach": ["ms_002"]}, "meta": {"total_preferences": 43, "total_conflict_groups": 15, "generator": "schema_based"}} +{"user_id": "user_638d3467", "persona": "A student preparing for technical interviews. Needs step-by-step problem solving practice.", "preferences": [{"pref_id": "ip_001", "condition": "the task is complex with multiple parts", "action": "confirm the plan before executing, break into phases", "conflict_group": "autonomy", "priority_context": ["complex", "multiple", "project"]}, {"pref_id": "ip_005", "condition": "I express frustration or say 'this is annoying'", "action": "acknowledge the difficulty briefly, then provide direct help", "conflict_group": null, "priority_context": ["frustration", "annoying", "stuck"]}, {"pref_id": "cs_001", "condition": "writing Python code", "action": "use snake_case for variables and functions, include type hints", "conflict_group": "naming_convention", "priority_context": ["python", "py"]}, {"pref_id": "cs_007", "condition": "I ask to improve or refactor code", "action": "address both logic and style, suggest modern idioms", "conflict_group": "review_scope", "priority_context": ["improve", "refactor", "better"]}, {"pref_id": "ds_005", "condition": "discussing data structures", "action": "always include time complexity for operations mentioned", "conflict_group": null, "priority_context": ["data structure", "array", "tree", "hash"]}, {"pref_id": "ip_004", "condition": "I'm comparing alternatives", "action": "present trade-offs in a table format with clear criteria", "conflict_group": "guidance_style", "priority_context": ["compare", "vs", "or", "which"]}, {"pref_id": "ec_002", "condition": "I have a fundamental misconception", "action": "address the misconception directly and clearly before proceeding", "conflict_group": "correction_style", "priority_context": ["misconception", "fundamental_error"]}, {"pref_id": "cs_003", "condition": "writing SQL queries", "action": "use UPPERCASE for keywords, lowercase for table/column names", "conflict_group": "naming_convention", "priority_context": ["sql", "database", "query"]}, {"pref_id": "cs_002", "condition": "writing JavaScript or TypeScript code", "action": "use camelCase for variables, PascalCase for classes", "conflict_group": "naming_convention", "priority_context": ["javascript", "js", "typescript", "ts"]}, {"pref_id": "ec_001", "condition": "I make a minor error in terminology", "action": "correct it gently inline without making it a focus", "conflict_group": "correction_style", "priority_context": ["minor_error", "terminology"]}, {"pref_id": "vb_002", "condition": "the topic involves complex algorithms or mathematics", "action": "provide detailed step-by-step derivation with intermediate results", "conflict_group": "response_length", "priority_context": ["complex_topic", "proof", "derivation"]}, {"pref_id": "ip_002", "condition": "I give a clear and specific instruction", "action": "execute directly without asking for confirmation", "conflict_group": "autonomy", "priority_context": ["do this", "make this", "specific_instruction"]}, {"pref_id": "ms_003", "condition": "I ask to verify my calculation", "action": "check my work step by step, point out where I diverged if wrong", "conflict_group": null, "priority_context": ["verify", "check", "is this right"]}, {"pref_id": "ms_005", "condition": "discussing proofs", "action": "structure as: claim, approach sketch, formal proof, intuition recap", "conflict_group": "proof_style", "priority_context": ["prove", "proof", "show that"]}, {"pref_id": "vb_004", "condition": "I'm debugging and say 'it doesn't work'", "action": "focus on diagnosing the specific issue, skip general explanations", "conflict_group": "explanation_depth", "priority_context": ["debugging", "error", "fix"]}, {"pref_id": "oa_001", "condition": "generating code that will be copied", "action": "provide code in a single copyable block, no interleaved explanation", "conflict_group": "code_presentation", "priority_context": ["copy", "use this", "give me code"]}, {"pref_id": "ms_002", "condition": "discussing statistics or probability", "action": "start with intuition and real-world interpretation before formulas", "conflict_group": "math_approach", "priority_context": ["probability", "statistics", "likelihood"]}, {"pref_id": "vb_001", "condition": "I say 'quick question' or 'briefly'", "action": "respond in 3 sentences or fewer, no elaboration", "conflict_group": "response_length", "priority_context": ["time_pressure", "simple_query"]}, {"pref_id": "ip_006", "condition": "I thank you or say the answer was helpful", "action": "don't add unnecessary follow-up, just acknowledge briefly", "conflict_group": null, "priority_context": ["thanks", "helpful", "great"]}, {"pref_id": "cs_006", "condition": "I ask for a code review", "action": "focus only on bugs and logic errors, ignore style issues", "conflict_group": "review_scope", "priority_context": ["review", "check", "look at"]}, {"pref_id": "ms_004", "condition": "the problem involves calculus", "action": "state the rule being applied (chain rule, integration by parts, etc.)", "conflict_group": "math_detail", "priority_context": ["derivative", "integral", "calculus"]}, {"pref_id": "vb_005", "condition": "I explicitly share my current understanding first", "action": "acknowledge what I got right, then correct only the gaps", "conflict_group": null, "priority_context": ["validation", "checking"]}, {"pref_id": "oa_004", "condition": "providing commands to run", "action": "use bash code blocks, include expected output as comments", "conflict_group": null, "priority_context": ["command", "run", "terminal"]}, {"pref_id": "ds_001", "condition": "discussing machine learning concepts", "action": "include the mathematical formulation alongside intuitive explanation", "conflict_group": null, "priority_context": ["ml", "machine learning", "model"]}, {"pref_id": "rf_001", "condition": "listing multiple items or options", "action": "use bullet points with consistent indentation", "conflict_group": "format_structure", "priority_context": ["enumeration", "comparison"]}, {"pref_id": "ms_006", "condition": "I'm practicing for an exam", "action": "after solving, give a similar practice problem", "conflict_group": null, "priority_context": ["practice", "exam", "test"]}, {"pref_id": "oa_002", "condition": "teaching through code examples", "action": "break code into small chunks with explanation between each", "conflict_group": "code_presentation", "priority_context": ["teach", "learn", "understand"]}, {"pref_id": "ds_004", "condition": "explaining a theoretical concept", "action": "start with definition, then example, then edge cases", "conflict_group": "example_position", "priority_context": ["concept", "theory", "what is"]}, {"pref_id": "rf_002", "condition": "explaining a sequential process or procedure", "action": "use numbered steps with clear transitions", "conflict_group": "format_structure", "priority_context": ["tutorial", "how-to", "setup"]}, {"pref_id": "ds_003", "condition": "discussing APIs or library usage", "action": "show a minimal working example before explaining parameters", "conflict_group": "example_position", "priority_context": ["api", "library", "how to use"]}, {"pref_id": "oa_003", "condition": "any response with code", "action": "always specify the language in the code fence", "conflict_group": null, "priority_context": ["code"]}, {"pref_id": "cs_008", "condition": "providing error handling examples", "action": "always use specific exception types, never bare except", "conflict_group": null, "priority_context": ["error", "exception", "try"]}, {"pref_id": "ip_003", "condition": "I seem uncertain or ask 'what do you think'", "action": "provide a recommendation with brief rationale, not just options", "conflict_group": "guidance_style", "priority_context": ["uncertain", "should I", "what do you think"]}, {"pref_id": "vb_003", "condition": "I ask 'why' or 'how come'", "action": "always explain the underlying reasoning, not just the what", "conflict_group": "explanation_depth", "priority_context": ["curiosity", "understanding"]}, {"pref_id": "ds_006", "condition": "writing or reviewing documentation", "action": "be concise, avoid marketing language, focus on usage", "conflict_group": null, "priority_context": ["documentation", "docs", "readme"]}, {"pref_id": "cs_005", "condition": "the code is a complete module or class", "action": "use docstrings at function/class level, minimal inline comments", "conflict_group": "comment_style", "priority_context": ["module", "class", "production"]}, {"pref_id": "ec_004", "condition": "I ask you to correct your previous response", "action": "acknowledge the error explicitly, then provide corrected version", "conflict_group": null, "priority_context": ["you were wrong", "that's not right", "actually"]}, {"pref_id": "rf_003", "condition": "providing a final answer or conclusion", "action": "put the answer first, then explanation", "conflict_group": "answer_position", "priority_context": ["direct_question", "what_is"]}, {"pref_id": "ms_001", "condition": "solving algebraic equations", "action": "show each manipulation step with the operation applied noted", "conflict_group": "math_detail", "priority_context": ["solve", "equation", "algebra"]}, {"pref_id": "rf_004", "condition": "teaching a new concept", "action": "build up intuition before giving the formal definition", "conflict_group": "answer_position", "priority_context": ["learning", "explain", "why"]}], "conflict_groups": {"autonomy": ["ip_001", "ip_002"], "naming_convention": ["cs_001", "cs_003", "cs_002"], "review_scope": ["cs_007", "cs_006"], "guidance_style": ["ip_004", "ip_003"], "correction_style": ["ec_002", "ec_001"], "response_length": ["vb_002", "vb_001"], "proof_style": ["ms_005"], "explanation_depth": ["vb_004", "vb_003"], "code_presentation": ["oa_001", "oa_002"], "math_approach": ["ms_002"], "math_detail": ["ms_004", "ms_001"], "format_structure": ["rf_001", "rf_002"], "example_position": ["ds_004", "ds_003"], "comment_style": ["cs_005"], "answer_position": ["rf_003", "rf_004"]}, "meta": {"total_preferences": 40, "total_conflict_groups": 15, "generator": "schema_based"}} +{"user_id": "user_7203131a", "persona": "A data scientist who thinks visually. Prefers intuition before formulas and lots of examples.", "preferences": [{"pref_id": "cs_007", "condition": "I ask to improve or refactor code", "action": "address both logic and style, suggest modern idioms", "conflict_group": "review_scope", "priority_context": ["improve", "refactor", "better"]}, {"pref_id": "ip_002", "condition": "I give a clear and specific instruction", "action": "execute directly without asking for confirmation", "conflict_group": "autonomy", "priority_context": ["do this", "make this", "specific_instruction"]}, {"pref_id": "ec_002", "condition": "I have a fundamental misconception", "action": "address the misconception directly and clearly before proceeding", "conflict_group": "correction_style", "priority_context": ["misconception", "fundamental_error"]}, {"pref_id": "ms_006", "condition": "I'm practicing for an exam", "action": "after solving, give a similar practice problem", "conflict_group": null, "priority_context": ["practice", "exam", "test"]}, {"pref_id": "oa_002", "condition": "teaching through code examples", "action": "break code into small chunks with explanation between each", "conflict_group": "code_presentation", "priority_context": ["teach", "learn", "understand"]}, {"pref_id": "ms_002", "condition": "discussing statistics or probability", "action": "start with intuition and real-world interpretation before formulas", "conflict_group": "math_approach", "priority_context": ["probability", "statistics", "likelihood"]}, {"pref_id": "ec_003", "condition": "my code has a bug", "action": "show the bug location, explain why it's wrong, provide the fix", "conflict_group": null, "priority_context": ["bug", "error", "wrong"]}, {"pref_id": "rf_002", "condition": "explaining a sequential process or procedure", "action": "use numbered steps with clear transitions", "conflict_group": "format_structure", "priority_context": ["tutorial", "how-to", "setup"]}, {"pref_id": "ip_003", "condition": "I seem uncertain or ask 'what do you think'", "action": "provide a recommendation with brief rationale, not just options", "conflict_group": "guidance_style", "priority_context": ["uncertain", "should I", "what do you think"]}, {"pref_id": "ms_001", "condition": "solving algebraic equations", "action": "show each manipulation step with the operation applied noted", "conflict_group": "math_detail", "priority_context": ["solve", "equation", "algebra"]}, {"pref_id": "rf_001", "condition": "listing multiple items or options", "action": "use bullet points with consistent indentation", "conflict_group": "format_structure", "priority_context": ["enumeration", "comparison"]}, {"pref_id": "rf_004", "condition": "teaching a new concept", "action": "build up intuition before giving the formal definition", "conflict_group": "answer_position", "priority_context": ["learning", "explain", "why"]}, {"pref_id": "ds_001", "condition": "discussing machine learning concepts", "action": "include the mathematical formulation alongside intuitive explanation", "conflict_group": null, "priority_context": ["ml", "machine learning", "model"]}, {"pref_id": "ms_003", "condition": "I ask to verify my calculation", "action": "check my work step by step, point out where I diverged if wrong", "conflict_group": null, "priority_context": ["verify", "check", "is this right"]}, {"pref_id": "ip_001", "condition": "the task is complex with multiple parts", "action": "confirm the plan before executing, break into phases", "conflict_group": "autonomy", "priority_context": ["complex", "multiple", "project"]}, {"pref_id": "oa_004", "condition": "providing commands to run", "action": "use bash code blocks, include expected output as comments", "conflict_group": null, "priority_context": ["command", "run", "terminal"]}, {"pref_id": "ip_004", "condition": "I'm comparing alternatives", "action": "present trade-offs in a table format with clear criteria", "conflict_group": "guidance_style", "priority_context": ["compare", "vs", "or", "which"]}, {"pref_id": "cs_001", "condition": "writing Python code", "action": "use snake_case for variables and functions, include type hints", "conflict_group": "naming_convention", "priority_context": ["python", "py"]}, {"pref_id": "oa_003", "condition": "any response with code", "action": "always specify the language in the code fence", "conflict_group": null, "priority_context": ["code"]}, {"pref_id": "vb_004", "condition": "I'm debugging and say 'it doesn't work'", "action": "focus on diagnosing the specific issue, skip general explanations", "conflict_group": "explanation_depth", "priority_context": ["debugging", "error", "fix"]}, {"pref_id": "ds_002", "condition": "discussing system design or architecture", "action": "describe components as a list first, then explain interactions", "conflict_group": null, "priority_context": ["design", "architecture", "system"]}, {"pref_id": "cs_003", "condition": "writing SQL queries", "action": "use UPPERCASE for keywords, lowercase for table/column names", "conflict_group": "naming_convention", "priority_context": ["sql", "database", "query"]}, {"pref_id": "ip_006", "condition": "I thank you or say the answer was helpful", "action": "don't add unnecessary follow-up, just acknowledge briefly", "conflict_group": null, "priority_context": ["thanks", "helpful", "great"]}, {"pref_id": "vb_003", "condition": "I ask 'why' or 'how come'", "action": "always explain the underlying reasoning, not just the what", "conflict_group": "explanation_depth", "priority_context": ["curiosity", "understanding"]}, {"pref_id": "vb_005", "condition": "I explicitly share my current understanding first", "action": "acknowledge what I got right, then correct only the gaps", "conflict_group": null, "priority_context": ["validation", "checking"]}, {"pref_id": "vb_001", "condition": "I say 'quick question' or 'briefly'", "action": "respond in 3 sentences or fewer, no elaboration", "conflict_group": "response_length", "priority_context": ["time_pressure", "simple_query"]}, {"pref_id": "ds_003", "condition": "discussing APIs or library usage", "action": "show a minimal working example before explaining parameters", "conflict_group": "example_position", "priority_context": ["api", "library", "how to use"]}, {"pref_id": "ec_001", "condition": "I make a minor error in terminology", "action": "correct it gently inline without making it a focus", "conflict_group": "correction_style", "priority_context": ["minor_error", "terminology"]}, {"pref_id": "ds_005", "condition": "discussing data structures", "action": "always include time complexity for operations mentioned", "conflict_group": null, "priority_context": ["data structure", "array", "tree", "hash"]}, {"pref_id": "cs_004", "condition": "the code snippet is short (under 20 lines)", "action": "include inline comments explaining each logical block", "conflict_group": "comment_style", "priority_context": ["example", "snippet"]}, {"pref_id": "ms_004", "condition": "the problem involves calculus", "action": "state the rule being applied (chain rule, integration by parts, etc.)", "conflict_group": "math_detail", "priority_context": ["derivative", "integral", "calculus"]}, {"pref_id": "ds_004", "condition": "explaining a theoretical concept", "action": "start with definition, then example, then edge cases", "conflict_group": "example_position", "priority_context": ["concept", "theory", "what is"]}, {"pref_id": "ds_006", "condition": "writing or reviewing documentation", "action": "be concise, avoid marketing language, focus on usage", "conflict_group": null, "priority_context": ["documentation", "docs", "readme"]}, {"pref_id": "vb_002", "condition": "the topic involves complex algorithms or mathematics", "action": "provide detailed step-by-step derivation with intermediate results", "conflict_group": "response_length", "priority_context": ["complex_topic", "proof", "derivation"]}, {"pref_id": "rf_003", "condition": "providing a final answer or conclusion", "action": "put the answer first, then explanation", "conflict_group": "answer_position", "priority_context": ["direct_question", "what_is"]}, {"pref_id": "ip_005", "condition": "I express frustration or say 'this is annoying'", "action": "acknowledge the difficulty briefly, then provide direct help", "conflict_group": null, "priority_context": ["frustration", "annoying", "stuck"]}], "conflict_groups": {"review_scope": ["cs_007"], "autonomy": ["ip_002", "ip_001"], "correction_style": ["ec_002", "ec_001"], "code_presentation": ["oa_002"], "math_approach": ["ms_002"], "format_structure": ["rf_002", "rf_001"], "guidance_style": ["ip_003", "ip_004"], "math_detail": ["ms_001", "ms_004"], "answer_position": ["rf_004", "rf_003"], "naming_convention": ["cs_001", "cs_003"], "explanation_depth": ["vb_004", "vb_003"], "response_length": ["vb_001", "vb_002"], "example_position": ["ds_003", "ds_004"], "comment_style": ["cs_004"]}, "meta": {"total_preferences": 36, "total_conflict_groups": 14, "generator": "schema_based"}} +{"user_id": "user_f0694c1d", "persona": "A tech lead reviewing code from their team. Focuses on maintainability and best practices.", "preferences": [{"pref_id": "rf_002", "condition": "explaining a sequential process or procedure", "action": "use numbered steps with clear transitions", "conflict_group": "format_structure", "priority_context": ["tutorial", "how-to", "setup"]}, {"pref_id": "oa_001", "condition": "generating code that will be copied", "action": "provide code in a single copyable block, no interleaved explanation", "conflict_group": "code_presentation", "priority_context": ["copy", "use this", "give me code"]}, {"pref_id": "ec_001", "condition": "I make a minor error in terminology", "action": "correct it gently inline without making it a focus", "conflict_group": "correction_style", "priority_context": ["minor_error", "terminology"]}, {"pref_id": "ms_004", "condition": "the problem involves calculus", "action": "state the rule being applied (chain rule, integration by parts, etc.)", "conflict_group": "math_detail", "priority_context": ["derivative", "integral", "calculus"]}, {"pref_id": "cs_006", "condition": "I ask for a code review", "action": "focus only on bugs and logic errors, ignore style issues", "conflict_group": "review_scope", "priority_context": ["review", "check", "look at"]}, {"pref_id": "vb_001", "condition": "I say 'quick question' or 'briefly'", "action": "respond in 3 sentences or fewer, no elaboration", "conflict_group": "response_length", "priority_context": ["time_pressure", "simple_query"]}, {"pref_id": "ds_001", "condition": "discussing machine learning concepts", "action": "include the mathematical formulation alongside intuitive explanation", "conflict_group": null, "priority_context": ["ml", "machine learning", "model"]}, {"pref_id": "ms_003", "condition": "I ask to verify my calculation", "action": "check my work step by step, point out where I diverged if wrong", "conflict_group": null, "priority_context": ["verify", "check", "is this right"]}, {"pref_id": "ip_004", "condition": "I'm comparing alternatives", "action": "present trade-offs in a table format with clear criteria", "conflict_group": "guidance_style", "priority_context": ["compare", "vs", "or", "which"]}, {"pref_id": "vb_004", "condition": "I'm debugging and say 'it doesn't work'", "action": "focus on diagnosing the specific issue, skip general explanations", "conflict_group": "explanation_depth", "priority_context": ["debugging", "error", "fix"]}, {"pref_id": "vb_005", "condition": "I explicitly share my current understanding first", "action": "acknowledge what I got right, then correct only the gaps", "conflict_group": null, "priority_context": ["validation", "checking"]}, {"pref_id": "rf_003", "condition": "providing a final answer or conclusion", "action": "put the answer first, then explanation", "conflict_group": "answer_position", "priority_context": ["direct_question", "what_is"]}, {"pref_id": "ds_002", "condition": "discussing system design or architecture", "action": "describe components as a list first, then explain interactions", "conflict_group": null, "priority_context": ["design", "architecture", "system"]}, {"pref_id": "oa_004", "condition": "providing commands to run", "action": "use bash code blocks, include expected output as comments", "conflict_group": null, "priority_context": ["command", "run", "terminal"]}, {"pref_id": "cs_001", "condition": "writing Python code", "action": "use snake_case for variables and functions, include type hints", "conflict_group": "naming_convention", "priority_context": ["python", "py"]}, {"pref_id": "cs_007", "condition": "I ask to improve or refactor code", "action": "address both logic and style, suggest modern idioms", "conflict_group": "review_scope", "priority_context": ["improve", "refactor", "better"]}, {"pref_id": "rf_004", "condition": "teaching a new concept", "action": "build up intuition before giving the formal definition", "conflict_group": "answer_position", "priority_context": ["learning", "explain", "why"]}, {"pref_id": "ds_006", "condition": "writing or reviewing documentation", "action": "be concise, avoid marketing language, focus on usage", "conflict_group": null, "priority_context": ["documentation", "docs", "readme"]}, {"pref_id": "ds_005", "condition": "discussing data structures", "action": "always include time complexity for operations mentioned", "conflict_group": null, "priority_context": ["data structure", "array", "tree", "hash"]}, {"pref_id": "ms_001", "condition": "solving algebraic equations", "action": "show each manipulation step with the operation applied noted", "conflict_group": "math_detail", "priority_context": ["solve", "equation", "algebra"]}, {"pref_id": "ec_003", "condition": "my code has a bug", "action": "show the bug location, explain why it's wrong, provide the fix", "conflict_group": null, "priority_context": ["bug", "error", "wrong"]}, {"pref_id": "cs_004", "condition": "the code snippet is short (under 20 lines)", "action": "include inline comments explaining each logical block", "conflict_group": "comment_style", "priority_context": ["example", "snippet"]}, {"pref_id": "ec_004", "condition": "I ask you to correct your previous response", "action": "acknowledge the error explicitly, then provide corrected version", "conflict_group": null, "priority_context": ["you were wrong", "that's not right", "actually"]}, {"pref_id": "cs_003", "condition": "writing SQL queries", "action": "use UPPERCASE for keywords, lowercase for table/column names", "conflict_group": "naming_convention", "priority_context": ["sql", "database", "query"]}, {"pref_id": "ip_001", "condition": "the task is complex with multiple parts", "action": "confirm the plan before executing, break into phases", "conflict_group": "autonomy", "priority_context": ["complex", "multiple", "project"]}, {"pref_id": "cs_005", "condition": "the code is a complete module or class", "action": "use docstrings at function/class level, minimal inline comments", "conflict_group": "comment_style", "priority_context": ["module", "class", "production"]}, {"pref_id": "ms_006", "condition": "I'm practicing for an exam", "action": "after solving, give a similar practice problem", "conflict_group": null, "priority_context": ["practice", "exam", "test"]}, {"pref_id": "ms_002", "condition": "discussing statistics or probability", "action": "start with intuition and real-world interpretation before formulas", "conflict_group": "math_approach", "priority_context": ["probability", "statistics", "likelihood"]}, {"pref_id": "cs_002", "condition": "writing JavaScript or TypeScript code", "action": "use camelCase for variables, PascalCase for classes", "conflict_group": "naming_convention", "priority_context": ["javascript", "js", "typescript", "ts"]}, {"pref_id": "ec_002", "condition": "I have a fundamental misconception", "action": "address the misconception directly and clearly before proceeding", "conflict_group": "correction_style", "priority_context": ["misconception", "fundamental_error"]}, {"pref_id": "ip_002", "condition": "I give a clear and specific instruction", "action": "execute directly without asking for confirmation", "conflict_group": "autonomy", "priority_context": ["do this", "make this", "specific_instruction"]}, {"pref_id": "ip_006", "condition": "I thank you or say the answer was helpful", "action": "don't add unnecessary follow-up, just acknowledge briefly", "conflict_group": null, "priority_context": ["thanks", "helpful", "great"]}, {"pref_id": "vb_003", "condition": "I ask 'why' or 'how come'", "action": "always explain the underlying reasoning, not just the what", "conflict_group": "explanation_depth", "priority_context": ["curiosity", "understanding"]}, {"pref_id": "cs_008", "condition": "providing error handling examples", "action": "always use specific exception types, never bare except", "conflict_group": null, "priority_context": ["error", "exception", "try"]}, {"pref_id": "rf_001", "condition": "listing multiple items or options", "action": "use bullet points with consistent indentation", "conflict_group": "format_structure", "priority_context": ["enumeration", "comparison"]}, {"pref_id": "vb_002", "condition": "the topic involves complex algorithms or mathematics", "action": "provide detailed step-by-step derivation with intermediate results", "conflict_group": "response_length", "priority_context": ["complex_topic", "proof", "derivation"]}, {"pref_id": "ip_005", "condition": "I express frustration or say 'this is annoying'", "action": "acknowledge the difficulty briefly, then provide direct help", "conflict_group": null, "priority_context": ["frustration", "annoying", "stuck"]}, {"pref_id": "ip_003", "condition": "I seem uncertain or ask 'what do you think'", "action": "provide a recommendation with brief rationale, not just options", "conflict_group": "guidance_style", "priority_context": ["uncertain", "should I", "what do you think"]}, {"pref_id": "ms_005", "condition": "discussing proofs", "action": "structure as: claim, approach sketch, formal proof, intuition recap", "conflict_group": "proof_style", "priority_context": ["prove", "proof", "show that"]}, {"pref_id": "oa_003", "condition": "any response with code", "action": "always specify the language in the code fence", "conflict_group": null, "priority_context": ["code"]}, {"pref_id": "oa_002", "condition": "teaching through code examples", "action": "break code into small chunks with explanation between each", "conflict_group": "code_presentation", "priority_context": ["teach", "learn", "understand"]}, {"pref_id": "ds_004", "condition": "explaining a theoretical concept", "action": "start with definition, then example, then edge cases", "conflict_group": "example_position", "priority_context": ["concept", "theory", "what is"]}], "conflict_groups": {"format_structure": ["rf_002", "rf_001"], "code_presentation": ["oa_001", "oa_002"], "correction_style": ["ec_001", "ec_002"], "math_detail": ["ms_004", "ms_001"], "review_scope": ["cs_006", "cs_007"], "response_length": ["vb_001", "vb_002"], "guidance_style": ["ip_004", "ip_003"], "explanation_depth": ["vb_004", "vb_003"], "answer_position": ["rf_003", "rf_004"], "naming_convention": ["cs_001", "cs_003", "cs_002"], "comment_style": ["cs_004", "cs_005"], "autonomy": ["ip_001", "ip_002"], "math_approach": ["ms_002"], "proof_style": ["ms_005"], "example_position": ["ds_004"]}, "meta": {"total_preferences": 42, "total_conflict_groups": 15, "generator": "schema_based"}} +{"user_id": "user_67ee9892", "persona": "A student preparing for technical interviews. Needs step-by-step problem solving practice.", "preferences": [{"pref_id": "ms_001", "condition": "solving algebraic equations", "action": "show each manipulation step with the operation applied noted", "conflict_group": "math_detail", "priority_context": ["solve", "equation", "algebra"]}, {"pref_id": "ms_003", "condition": "I ask to verify my calculation", "action": "check my work step by step, point out where I diverged if wrong", "conflict_group": null, "priority_context": ["verify", "check", "is this right"]}, {"pref_id": "ms_005", "condition": "discussing proofs", "action": "structure as: claim, approach sketch, formal proof, intuition recap", "conflict_group": "proof_style", "priority_context": ["prove", "proof", "show that"]}, {"pref_id": "ms_006", "condition": "I'm practicing for an exam", "action": "after solving, give a similar practice problem", "conflict_group": null, "priority_context": ["practice", "exam", "test"]}, {"pref_id": "rf_001", "condition": "listing multiple items or options", "action": "use bullet points with consistent indentation", "conflict_group": "format_structure", "priority_context": ["enumeration", "comparison"]}, {"pref_id": "cs_003", "condition": "writing SQL queries", "action": "use UPPERCASE for keywords, lowercase for table/column names", "conflict_group": "naming_convention", "priority_context": ["sql", "database", "query"]}, {"pref_id": "cs_001", "condition": "writing Python code", "action": "use snake_case for variables and functions, include type hints", "conflict_group": "naming_convention", "priority_context": ["python", "py"]}, {"pref_id": "ip_003", "condition": "I seem uncertain or ask 'what do you think'", "action": "provide a recommendation with brief rationale, not just options", "conflict_group": "guidance_style", "priority_context": ["uncertain", "should I", "what do you think"]}, {"pref_id": "vb_001", "condition": "I say 'quick question' or 'briefly'", "action": "respond in 3 sentences or fewer, no elaboration", "conflict_group": "response_length", "priority_context": ["time_pressure", "simple_query"]}, {"pref_id": "ec_002", "condition": "I have a fundamental misconception", "action": "address the misconception directly and clearly before proceeding", "conflict_group": "correction_style", "priority_context": ["misconception", "fundamental_error"]}, {"pref_id": "rf_002", "condition": "explaining a sequential process or procedure", "action": "use numbered steps with clear transitions", "conflict_group": "format_structure", "priority_context": ["tutorial", "how-to", "setup"]}, {"pref_id": "vb_002", "condition": "the topic involves complex algorithms or mathematics", "action": "provide detailed step-by-step derivation with intermediate results", "conflict_group": "response_length", "priority_context": ["complex_topic", "proof", "derivation"]}, {"pref_id": "ip_001", "condition": "the task is complex with multiple parts", "action": "confirm the plan before executing, break into phases", "conflict_group": "autonomy", "priority_context": ["complex", "multiple", "project"]}, {"pref_id": "cs_008", "condition": "providing error handling examples", "action": "always use specific exception types, never bare except", "conflict_group": null, "priority_context": ["error", "exception", "try"]}, {"pref_id": "vb_003", "condition": "I ask 'why' or 'how come'", "action": "always explain the underlying reasoning, not just the what", "conflict_group": "explanation_depth", "priority_context": ["curiosity", "understanding"]}, {"pref_id": "cs_004", "condition": "the code snippet is short (under 20 lines)", "action": "include inline comments explaining each logical block", "conflict_group": "comment_style", "priority_context": ["example", "snippet"]}, {"pref_id": "cs_005", "condition": "the code is a complete module or class", "action": "use docstrings at function/class level, minimal inline comments", "conflict_group": "comment_style", "priority_context": ["module", "class", "production"]}, {"pref_id": "cs_006", "condition": "I ask for a code review", "action": "focus only on bugs and logic errors, ignore style issues", "conflict_group": "review_scope", "priority_context": ["review", "check", "look at"]}, {"pref_id": "cs_002", "condition": "writing JavaScript or TypeScript code", "action": "use camelCase for variables, PascalCase for classes", "conflict_group": "naming_convention", "priority_context": ["javascript", "js", "typescript", "ts"]}, {"pref_id": "ds_003", "condition": "discussing APIs or library usage", "action": "show a minimal working example before explaining parameters", "conflict_group": "example_position", "priority_context": ["api", "library", "how to use"]}, {"pref_id": "ec_003", "condition": "my code has a bug", "action": "show the bug location, explain why it's wrong, provide the fix", "conflict_group": null, "priority_context": ["bug", "error", "wrong"]}, {"pref_id": "ds_005", "condition": "discussing data structures", "action": "always include time complexity for operations mentioned", "conflict_group": null, "priority_context": ["data structure", "array", "tree", "hash"]}, {"pref_id": "ip_002", "condition": "I give a clear and specific instruction", "action": "execute directly without asking for confirmation", "conflict_group": "autonomy", "priority_context": ["do this", "make this", "specific_instruction"]}, {"pref_id": "ms_002", "condition": "discussing statistics or probability", "action": "start with intuition and real-world interpretation before formulas", "conflict_group": "math_approach", "priority_context": ["probability", "statistics", "likelihood"]}, {"pref_id": "rf_003", "condition": "providing a final answer or conclusion", "action": "put the answer first, then explanation", "conflict_group": "answer_position", "priority_context": ["direct_question", "what_is"]}, {"pref_id": "ds_004", "condition": "explaining a theoretical concept", "action": "start with definition, then example, then edge cases", "conflict_group": "example_position", "priority_context": ["concept", "theory", "what is"]}, {"pref_id": "ds_006", "condition": "writing or reviewing documentation", "action": "be concise, avoid marketing language, focus on usage", "conflict_group": null, "priority_context": ["documentation", "docs", "readme"]}, {"pref_id": "ds_002", "condition": "discussing system design or architecture", "action": "describe components as a list first, then explain interactions", "conflict_group": null, "priority_context": ["design", "architecture", "system"]}, {"pref_id": "ip_005", "condition": "I express frustration or say 'this is annoying'", "action": "acknowledge the difficulty briefly, then provide direct help", "conflict_group": null, "priority_context": ["frustration", "annoying", "stuck"]}, {"pref_id": "oa_002", "condition": "teaching through code examples", "action": "break code into small chunks with explanation between each", "conflict_group": "code_presentation", "priority_context": ["teach", "learn", "understand"]}, {"pref_id": "ec_004", "condition": "I ask you to correct your previous response", "action": "acknowledge the error explicitly, then provide corrected version", "conflict_group": null, "priority_context": ["you were wrong", "that's not right", "actually"]}, {"pref_id": "ms_004", "condition": "the problem involves calculus", "action": "state the rule being applied (chain rule, integration by parts, etc.)", "conflict_group": "math_detail", "priority_context": ["derivative", "integral", "calculus"]}, {"pref_id": "oa_004", "condition": "providing commands to run", "action": "use bash code blocks, include expected output as comments", "conflict_group": null, "priority_context": ["command", "run", "terminal"]}, {"pref_id": "ds_001", "condition": "discussing machine learning concepts", "action": "include the mathematical formulation alongside intuitive explanation", "conflict_group": null, "priority_context": ["ml", "machine learning", "model"]}, {"pref_id": "vb_004", "condition": "I'm debugging and say 'it doesn't work'", "action": "focus on diagnosing the specific issue, skip general explanations", "conflict_group": "explanation_depth", "priority_context": ["debugging", "error", "fix"]}, {"pref_id": "ip_006", "condition": "I thank you or say the answer was helpful", "action": "don't add unnecessary follow-up, just acknowledge briefly", "conflict_group": null, "priority_context": ["thanks", "helpful", "great"]}, {"pref_id": "rf_004", "condition": "teaching a new concept", "action": "build up intuition before giving the formal definition", "conflict_group": "answer_position", "priority_context": ["learning", "explain", "why"]}], "conflict_groups": {"math_detail": ["ms_001", "ms_004"], "proof_style": ["ms_005"], "format_structure": ["rf_001", "rf_002"], "naming_convention": ["cs_003", "cs_001", "cs_002"], "guidance_style": ["ip_003"], "response_length": ["vb_001", "vb_002"], "correction_style": ["ec_002"], "autonomy": ["ip_001", "ip_002"], "explanation_depth": ["vb_003", "vb_004"], "comment_style": ["cs_004", "cs_005"], "review_scope": ["cs_006"], "example_position": ["ds_003", "ds_004"], "math_approach": ["ms_002"], "answer_position": ["rf_003", "rf_004"], "code_presentation": ["oa_002"]}, "meta": {"total_preferences": 37, "total_conflict_groups": 15, "generator": "schema_based"}} +{"user_id": "user_78da9d75", "persona": "A junior developer learning full-stack. Prefers patient, incremental explanations with examples.", "preferences": [{"pref_id": "ip_003", "condition": "I seem uncertain or ask 'what do you think'", "action": "provide a recommendation with brief rationale, not just options", "conflict_group": "guidance_style", "priority_context": ["uncertain", "should I", "what do you think"]}, {"pref_id": "ip_006", "condition": "I thank you or say the answer was helpful", "action": "don't add unnecessary follow-up, just acknowledge briefly", "conflict_group": null, "priority_context": ["thanks", "helpful", "great"]}, {"pref_id": "ip_002", "condition": "I give a clear and specific instruction", "action": "execute directly without asking for confirmation", "conflict_group": "autonomy", "priority_context": ["do this", "make this", "specific_instruction"]}, {"pref_id": "ms_005", "condition": "discussing proofs", "action": "structure as: claim, approach sketch, formal proof, intuition recap", "conflict_group": "proof_style", "priority_context": ["prove", "proof", "show that"]}, {"pref_id": "cs_003", "condition": "writing SQL queries", "action": "use UPPERCASE for keywords, lowercase for table/column names", "conflict_group": "naming_convention", "priority_context": ["sql", "database", "query"]}, {"pref_id": "ds_003", "condition": "discussing APIs or library usage", "action": "show a minimal working example before explaining parameters", "conflict_group": "example_position", "priority_context": ["api", "library", "how to use"]}, {"pref_id": "ec_003", "condition": "my code has a bug", "action": "show the bug location, explain why it's wrong, provide the fix", "conflict_group": null, "priority_context": ["bug", "error", "wrong"]}, {"pref_id": "ip_001", "condition": "the task is complex with multiple parts", "action": "confirm the plan before executing, break into phases", "conflict_group": "autonomy", "priority_context": ["complex", "multiple", "project"]}, {"pref_id": "ds_005", "condition": "discussing data structures", "action": "always include time complexity for operations mentioned", "conflict_group": null, "priority_context": ["data structure", "array", "tree", "hash"]}, {"pref_id": "ms_001", "condition": "solving algebraic equations", "action": "show each manipulation step with the operation applied noted", "conflict_group": "math_detail", "priority_context": ["solve", "equation", "algebra"]}, {"pref_id": "vb_002", "condition": "the topic involves complex algorithms or mathematics", "action": "provide detailed step-by-step derivation with intermediate results", "conflict_group": "response_length", "priority_context": ["complex_topic", "proof", "derivation"]}, {"pref_id": "ip_005", "condition": "I express frustration or say 'this is annoying'", "action": "acknowledge the difficulty briefly, then provide direct help", "conflict_group": null, "priority_context": ["frustration", "annoying", "stuck"]}, {"pref_id": "rf_003", "condition": "providing a final answer or conclusion", "action": "put the answer first, then explanation", "conflict_group": "answer_position", "priority_context": ["direct_question", "what_is"]}, {"pref_id": "cs_005", "condition": "the code is a complete module or class", "action": "use docstrings at function/class level, minimal inline comments", "conflict_group": "comment_style", "priority_context": ["module", "class", "production"]}, {"pref_id": "ms_003", "condition": "I ask to verify my calculation", "action": "check my work step by step, point out where I diverged if wrong", "conflict_group": null, "priority_context": ["verify", "check", "is this right"]}, {"pref_id": "ip_004", "condition": "I'm comparing alternatives", "action": "present trade-offs in a table format with clear criteria", "conflict_group": "guidance_style", "priority_context": ["compare", "vs", "or", "which"]}, {"pref_id": "ds_004", "condition": "explaining a theoretical concept", "action": "start with definition, then example, then edge cases", "conflict_group": "example_position", "priority_context": ["concept", "theory", "what is"]}, {"pref_id": "ds_006", "condition": "writing or reviewing documentation", "action": "be concise, avoid marketing language, focus on usage", "conflict_group": null, "priority_context": ["documentation", "docs", "readme"]}, {"pref_id": "cs_001", "condition": "writing Python code", "action": "use snake_case for variables and functions, include type hints", "conflict_group": "naming_convention", "priority_context": ["python", "py"]}, {"pref_id": "cs_002", "condition": "writing JavaScript or TypeScript code", "action": "use camelCase for variables, PascalCase for classes", "conflict_group": "naming_convention", "priority_context": ["javascript", "js", "typescript", "ts"]}, {"pref_id": "rf_004", "condition": "teaching a new concept", "action": "build up intuition before giving the formal definition", "conflict_group": "answer_position", "priority_context": ["learning", "explain", "why"]}, {"pref_id": "ds_002", "condition": "discussing system design or architecture", "action": "describe components as a list first, then explain interactions", "conflict_group": null, "priority_context": ["design", "architecture", "system"]}, {"pref_id": "ec_001", "condition": "I make a minor error in terminology", "action": "correct it gently inline without making it a focus", "conflict_group": "correction_style", "priority_context": ["minor_error", "terminology"]}, {"pref_id": "oa_002", "condition": "teaching through code examples", "action": "break code into small chunks with explanation between each", "conflict_group": "code_presentation", "priority_context": ["teach", "learn", "understand"]}, {"pref_id": "oa_003", "condition": "any response with code", "action": "always specify the language in the code fence", "conflict_group": null, "priority_context": ["code"]}, {"pref_id": "cs_006", "condition": "I ask for a code review", "action": "focus only on bugs and logic errors, ignore style issues", "conflict_group": "review_scope", "priority_context": ["review", "check", "look at"]}, {"pref_id": "ds_001", "condition": "discussing machine learning concepts", "action": "include the mathematical formulation alongside intuitive explanation", "conflict_group": null, "priority_context": ["ml", "machine learning", "model"]}, {"pref_id": "ec_002", "condition": "I have a fundamental misconception", "action": "address the misconception directly and clearly before proceeding", "conflict_group": "correction_style", "priority_context": ["misconception", "fundamental_error"]}, {"pref_id": "ec_004", "condition": "I ask you to correct your previous response", "action": "acknowledge the error explicitly, then provide corrected version", "conflict_group": null, "priority_context": ["you were wrong", "that's not right", "actually"]}, {"pref_id": "vb_004", "condition": "I'm debugging and say 'it doesn't work'", "action": "focus on diagnosing the specific issue, skip general explanations", "conflict_group": "explanation_depth", "priority_context": ["debugging", "error", "fix"]}, {"pref_id": "cs_007", "condition": "I ask to improve or refactor code", "action": "address both logic and style, suggest modern idioms", "conflict_group": "review_scope", "priority_context": ["improve", "refactor", "better"]}, {"pref_id": "cs_008", "condition": "providing error handling examples", "action": "always use specific exception types, never bare except", "conflict_group": null, "priority_context": ["error", "exception", "try"]}, {"pref_id": "rf_001", "condition": "listing multiple items or options", "action": "use bullet points with consistent indentation", "conflict_group": "format_structure", "priority_context": ["enumeration", "comparison"]}, {"pref_id": "cs_004", "condition": "the code snippet is short (under 20 lines)", "action": "include inline comments explaining each logical block", "conflict_group": "comment_style", "priority_context": ["example", "snippet"]}, {"pref_id": "vb_003", "condition": "I ask 'why' or 'how come'", "action": "always explain the underlying reasoning, not just the what", "conflict_group": "explanation_depth", "priority_context": ["curiosity", "understanding"]}, {"pref_id": "vb_001", "condition": "I say 'quick question' or 'briefly'", "action": "respond in 3 sentences or fewer, no elaboration", "conflict_group": "response_length", "priority_context": ["time_pressure", "simple_query"]}, {"pref_id": "ms_002", "condition": "discussing statistics or probability", "action": "start with intuition and real-world interpretation before formulas", "conflict_group": "math_approach", "priority_context": ["probability", "statistics", "likelihood"]}, {"pref_id": "ms_004", "condition": "the problem involves calculus", "action": "state the rule being applied (chain rule, integration by parts, etc.)", "conflict_group": "math_detail", "priority_context": ["derivative", "integral", "calculus"]}, {"pref_id": "vb_005", "condition": "I explicitly share my current understanding first", "action": "acknowledge what I got right, then correct only the gaps", "conflict_group": null, "priority_context": ["validation", "checking"]}, {"pref_id": "ms_006", "condition": "I'm practicing for an exam", "action": "after solving, give a similar practice problem", "conflict_group": null, "priority_context": ["practice", "exam", "test"]}, {"pref_id": "rf_002", "condition": "explaining a sequential process or procedure", "action": "use numbered steps with clear transitions", "conflict_group": "format_structure", "priority_context": ["tutorial", "how-to", "setup"]}], "conflict_groups": {"guidance_style": ["ip_003", "ip_004"], "autonomy": ["ip_002", "ip_001"], "proof_style": ["ms_005"], "naming_convention": ["cs_003", "cs_001", "cs_002"], "example_position": ["ds_003", "ds_004"], "math_detail": ["ms_001", "ms_004"], "response_length": ["vb_002", "vb_001"], "answer_position": ["rf_003", "rf_004"], "comment_style": ["cs_005", "cs_004"], "correction_style": ["ec_001", "ec_002"], "code_presentation": ["oa_002"], "review_scope": ["cs_006", "cs_007"], "explanation_depth": ["vb_004", "vb_003"], "format_structure": ["rf_001", "rf_002"], "math_approach": ["ms_002"]}, "meta": {"total_preferences": 41, "total_conflict_groups": 15, "generator": "schema_based"}} diff --git a/collaborativeagents/scripts/analyze_vector_preference.py b/collaborativeagents/scripts/analyze_vector_preference.py new file mode 100755 index 0000000..7079b26 --- /dev/null +++ b/collaborativeagents/scripts/analyze_vector_preference.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +""" +分析 user vector 与 revealed preference 之间的关联强度 +""" +import json +import numpy as np +from pathlib import Path +import sys + +def load_experiment(exp_dir): + """加载实验结果""" + exp_path = Path(exp_dir) + + # 找到结果目录 + for method in ["rag_vector", "rag_vector_fast", "rag_vector_balanced"]: + for sub in exp_path.iterdir(): + result_dir = sub / method + if result_dir.exists(): + vectors_path = result_dir / "user_vectors.npz" + results_path = result_dir / "results.json" + if vectors_path.exists() and results_path.exists(): + return { + "vectors": np.load(vectors_path, allow_pickle=True), + "results": json.load(open(results_path)), + "method": method + } + return None + +def analyze_vectors(data): + """分析user vectors""" + vectors = data["vectors"] + results = data["results"] + + user_ids = vectors["user_ids"] + z_long = vectors["z_long"] + z_short = vectors["z_short"] + + print(f"=== User Vector 分析 ===") + print(f"用户数: {len(user_ids)}") + print(f"Vector维度: {z_long.shape[1]}") + + # 计算非零vector数量 + z_long_norms = np.linalg.norm(z_long, axis=1) + z_short_norms = np.linalg.norm(z_short, axis=1) + + nonzero_long = np.count_nonzero(z_long_norms) + nonzero_short = np.count_nonzero(z_short_norms) + + print(f"\nz_long 非零用户: {nonzero_long}/{len(user_ids)}") + print(f"z_short 非零用户: {nonzero_short}/{len(user_ids)}") + print(f"z_long norm 均值: {np.mean(z_long_norms):.4f}") + print(f"z_short norm 均值: {np.mean(z_short_norms):.4f}") + + # 按用户分析性能与vector norm的关系 + print(f"\n=== Vector Norm vs 性能 ===") + + user_stats = {} + for s in results: + uid = s.get("profile_id", "") + if uid not in user_stats: + user_stats[uid] = {"success": 0, "total": 0, "enforce": 0} + m = s.get("metrics", {}) + user_stats[uid]["total"] += 1 + user_stats[uid]["success"] += 1 if m.get("task_success", False) else 0 + user_stats[uid]["enforce"] += m.get("enforcement_count", 0) + + # 计算相关性 + success_rates = [] + norms = [] + + for i, uid in enumerate(user_ids): + if uid in user_stats and user_stats[uid]["total"] > 0: + sr = user_stats[uid]["success"] / user_stats[uid]["total"] + success_rates.append(sr) + norms.append(z_long_norms[i]) + + if len(success_rates) > 5: + corr = np.corrcoef(success_rates, norms)[0, 1] + print(f"z_long norm vs 成功率 相关系数: {corr:.4f}") + + return { + "n_users": len(user_ids), + "nonzero_long": nonzero_long, + "nonzero_short": nonzero_short, + "mean_norm_long": float(np.mean(z_long_norms)), + "mean_norm_short": float(np.mean(z_short_norms)), + } + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python analyze_vector_preference.py <experiment_dir>") + print("Example: python analyze_vector_preference.py collaborativeagents/results/rag_vector_v3") + sys.exit(1) + + exp_dir = sys.argv[1] + data = load_experiment(exp_dir) + + if data is None: + print(f"未找到有效的rag_vector实验结果: {exp_dir}") + sys.exit(1) + + print(f"加载实验: {data['method']}") + analyze_vectors(data) diff --git a/collaborativeagents/scripts/conflict_scenario_generator.py b/collaborativeagents/scripts/conflict_scenario_generator.py index 9d00de8..eaf8ef2 100644 --- a/collaborativeagents/scripts/conflict_scenario_generator.py +++ b/collaborativeagents/scripts/conflict_scenario_generator.py @@ -367,11 +367,14 @@ class ConflictScenarioGenerator: # Find conflict groups in these preferences conflict_groups = {} for pref in preferences: - cg = pref.get('conflict_group') - if cg: - if cg not in conflict_groups: - conflict_groups[cg] = [] - conflict_groups[cg].append(pref) + # Handle both dict preferences (with conflict_group) and string preferences + if isinstance(pref, dict): + cg = pref.get('conflict_group') + if cg: + if cg not in conflict_groups: + conflict_groups[cg] = [] + conflict_groups[cg].append(pref) + # String preferences don't have conflict groups - skip them # Find a conflict group with at least 2 preferences for cg, prefs in conflict_groups.items(): diff --git a/collaborativeagents/scripts/queue_next_run.sh b/collaborativeagents/scripts/queue_next_run.sh new file mode 100755 index 0000000..524f0e2 --- /dev/null +++ b/collaborativeagents/scripts/queue_next_run.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Wait for current fullrun_4methods to finish, then start real-profile experiment + +LOG="/workspace/personalization-user-model/collaborativeagents/results/fullrun_4methods.log" +SCRIPTS_DIR="/workspace/personalization-user-model/collaborativeagents/scripts" + +echo "[$(date)] Waiting for fullrun_4methods to complete..." + +while true; do + if grep -q "EXPERIMENT COMPLETE" "$LOG" 2>/dev/null; then + echo "[$(date)] fullrun_4methods completed!" + break + fi + # Check if process died without completing + if ! pgrep -f "fullrun_4methods" > /dev/null 2>&1; then + if ! grep -q "EXPERIMENT COMPLETE" "$LOG" 2>/dev/null; then + echo "[$(date)] WARNING: process died before completion. Starting next run anyway." + break + fi + fi + sleep 60 +done + +echo "[$(date)] Starting real-profile experiment: 4 methods x 100 profiles x 20 sessions" + +cd "$SCRIPTS_DIR" +nohup python3 run_experiments.py \ + --methods vanilla,reflection,rag,rag_vector \ + --datasets math-hard \ + --n-profiles 100 \ + --n-sessions 20 \ + --max-turns 8 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --parallel-profiles 100 \ + --reward-mode llm_local \ + --reward-vllm-url http://localhost:8005/v1 \ + --profile-path ../data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir ../results/realprofile_4methods \ + > ../results/realprofile_4methods.log 2>&1 & + +echo "[$(date)] Experiment launched with PID $!" +echo "Log: /workspace/personalization-user-model/collaborativeagents/results/realprofile_4methods.log" diff --git a/collaborativeagents/scripts/queue_rag_60s.sh b/collaborativeagents/scripts/queue_rag_60s.sh new file mode 100755 index 0000000..87d6679 --- /dev/null +++ b/collaborativeagents/scripts/queue_rag_60s.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# 等待 reflection_60s 完成后启动 rag 60 session 实验 + +echo "等待 reflection_60s 完成..." + +while true; do + CKPT=$(ls collaborativeagents/results/reflection_60s/*/reflection/checkpoint.json 2>/dev/null | head -1) + if [ -n "$CKPT" ]; then + PROGRESS=$(cat "$CKPT" | python3 -c " +import json, sys +data = json.load(sys.stdin) +total = sum(data['sessions_per_profile'].values()) +print(total) +" 2>/dev/null) + + if [ "$PROGRESS" = "3600" ]; then + echo "$(date '+%H:%M:%S') reflection_60s 已完成" + break + fi + echo "$(date '+%H:%M:%S') reflection_60s 进度: $PROGRESS/3600" + else + echo "$(date '+%H:%M:%S') 等待reflection_60s启动..." + fi + sleep 60 +done + +echo "启动 rag_60s..." + +nohup python collaborativeagents/scripts/run_experiments.py \ + --methods rag \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 60 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/rag_60s \ + > collaborativeagents/results/rag_60s.log 2>&1 & + +echo "rag_60s 已启动,PID: $!" diff --git a/collaborativeagents/scripts/queue_rag_rewrite.sh b/collaborativeagents/scripts/queue_rag_rewrite.sh new file mode 100755 index 0000000..a5b5b99 --- /dev/null +++ b/collaborativeagents/scripts/queue_rag_rewrite.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# 等待 rag_vector_60s 完成后启动 rag_rewrite 测试 + +echo "等待 rag_vector_60s 完成..." + +while true; do + CKPT=$(ls collaborativeagents/results/rag_vector_60s/*/rag_vector/checkpoint.json 2>/dev/null | head -1) + if [ -n "$CKPT" ]; then + PROGRESS=$(cat "$CKPT" | python3 -c " +import json, sys +data = json.load(sys.stdin) +total = sum(data['sessions_per_profile'].values()) +print(total) +" 2>/dev/null) + + if [ "$PROGRESS" = "3600" ]; then + echo "$(date '+%H:%M:%S') rag_vector_60s 已完成" + break + fi + echo "$(date '+%H:%M:%S') rag_vector_60s 进度: $PROGRESS/3600" + else + echo "$(date '+%H:%M:%S') 等待rag_vector_60s启动..." + fi + sleep 60 +done + +echo "启动 rag_rewrite_60s..." + +nohup python collaborativeagents/scripts/run_experiments.py \ + --methods rag_rewrite \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 60 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/rag_rewrite_60s \ + > collaborativeagents/results/rag_rewrite_60s.log 2>&1 & + +echo "rag_rewrite_60s 已启动,PID: $!" diff --git a/collaborativeagents/scripts/queue_rag_rewrite_vector.sh b/collaborativeagents/scripts/queue_rag_rewrite_vector.sh new file mode 100755 index 0000000..480e3ac --- /dev/null +++ b/collaborativeagents/scripts/queue_rag_rewrite_vector.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# 等待 rag_rewrite_60s 完成后启动 rag_rewrite_vector 测试 + +echo "等待 rag_rewrite_60s 完成..." + +while true; do + CKPT=$(ls collaborativeagents/results/rag_rewrite_60s/*/rag_rewrite/checkpoint.json 2>/dev/null | head -1) + if [ -n "$CKPT" ]; then + PROGRESS=$(cat "$CKPT" | python3 -c " +import json, sys +data = json.load(sys.stdin) +total = sum(data['sessions_per_profile'].values()) +print(total) +" 2>/dev/null) + + if [ "$PROGRESS" = "3600" ]; then + echo "$(date '+%H:%M:%S') rag_rewrite_60s 已完成" + break + fi + echo "$(date '+%H:%M:%S') rag_rewrite_60s 进度: $PROGRESS/3600" + else + echo "$(date '+%H:%M:%S') 等待rag_rewrite_60s启动..." + fi + sleep 120 +done + +echo "启动 rag_rewrite_vector_60s..." + +nohup python collaborativeagents/scripts/run_experiments.py \ + --methods rag_rewrite_vector \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 60 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/rag_rewrite_vector_60s \ + > collaborativeagents/results/rag_rewrite_vector_60s.log 2>&1 & + +echo "rag_rewrite_vector_60s 已启动,PID: $!" diff --git a/collaborativeagents/scripts/queue_rag_vector.sh b/collaborativeagents/scripts/queue_rag_vector.sh new file mode 100755 index 0000000..2760dc7 --- /dev/null +++ b/collaborativeagents/scripts/queue_rag_vector.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# 等待 topk5_v3 完成后启动 rag_vector 实验 + +echo "等待 topk5_v3 完成..." + +while true; do + CKPT=$(ls collaborativeagents/results/rag_topk5_v3/*/rag/checkpoint.json 2>/dev/null | head -1) + if [ -n "$CKPT" ]; then + PROGRESS=$(cat "$CKPT" | python3 -c " +import json, sys +data = json.load(sys.stdin) +total = sum(data['sessions_per_profile'].values()) +print(total) +" 2>/dev/null) + + if [ "$PROGRESS" = "1800" ]; then + echo "$(date '+%H:%M:%S') topk5_v3 已完成" + break + fi + echo "$(date '+%H:%M:%S') topk5_v3 进度: $PROGRESS/1800" + else + echo "$(date '+%H:%M:%S') 等待topk5_v3启动..." + fi + sleep 60 +done + +echo "启动 rag_vector..." + +nohup python collaborativeagents/scripts/run_experiments.py \ + --methods rag_vector \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 30 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/rag_vector_v3 \ + > collaborativeagents/results/rag_vector_v3.log 2>&1 & + +echo "rag_vector_v3 已启动,PID: $!" diff --git a/collaborativeagents/scripts/queue_reflection_60s.sh b/collaborativeagents/scripts/queue_reflection_60s.sh new file mode 100755 index 0000000..6d15073 --- /dev/null +++ b/collaborativeagents/scripts/queue_reflection_60s.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# 等待 rag_vector_v3 完成后启动 reflection 60 session 实验 + +echo "等待 rag_vector_v3 完成..." + +while true; do + CKPT=$(ls collaborativeagents/results/rag_vector_v3/*/rag_vector/checkpoint.json 2>/dev/null | head -1) + if [ -n "$CKPT" ]; then + PROGRESS=$(cat "$CKPT" | python3 -c " +import json, sys +data = json.load(sys.stdin) +total = sum(data['sessions_per_profile'].values()) +print(total) +" 2>/dev/null) + + if [ "$PROGRESS" = "1800" ]; then + echo "$(date '+%H:%M:%S') rag_vector_v3 已完成" + break + fi + echo "$(date '+%H:%M:%S') rag_vector_v3 进度: $PROGRESS/1800" + else + echo "$(date '+%H:%M:%S') 等待rag_vector_v3启动..." + fi + sleep 60 +done + +echo "启动 reflection_60s..." + +nohup python collaborativeagents/scripts/run_experiments.py \ + --methods reflection \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 60 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/reflection_60s \ + > collaborativeagents/results/reflection_60s.log 2>&1 & + +echo "reflection_60s 已启动,PID: $!" diff --git a/collaborativeagents/scripts/queue_topk5_v2.sh b/collaborativeagents/scripts/queue_topk5_v2.sh new file mode 100755 index 0000000..a116a28 --- /dev/null +++ b/collaborativeagents/scripts/queue_topk5_v2.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# 等待 rag_dynamic 实验完成后启动 topk5 新版本测试 + +echo "等待 rag_dynamic 实验完成..." + +while true; do + # 检查是否有运行中的实验进程 + if pgrep -f "rag_dynamic" > /dev/null; then + # 检查checkpoint进度 + PROGRESS=$(cat collaborativeagents/results/rag_dynamic_test/*/rag_dynamic/checkpoint.json 2>/dev/null | python3 -c " +import json, sys +try: + data = json.load(sys.stdin) + total = sum(data['sessions_per_profile'].values()) + print(f'{total}/1800') +except: + print('0/1800') +" 2>/dev/null) + echo "$(date '+%H:%M:%S') rag_dynamic 进度: $PROGRESS" + sleep 60 + else + echo "rag_dynamic 已完成或未运行,启动 topk5_v2..." + break + fi +done + +# 启动新实验 +nohup python collaborativeagents/scripts/run_experiments.py \ + --methods rag \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 30 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/rag_topk5_v2 \ + > collaborativeagents/results/rag_topk5_v2.log 2>&1 & + +echo "topk5_v2 已启动,PID: $!" diff --git a/collaborativeagents/scripts/queue_topk5_v3.sh b/collaborativeagents/scripts/queue_topk5_v3.sh new file mode 100755 index 0000000..e93b066 --- /dev/null +++ b/collaborativeagents/scripts/queue_topk5_v3.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# 等待 reflection_v2 实验完成后启动 topk5_v3 测试 + +echo "等待 reflection_v2 实验完成..." + +while true; do + # 检查checkpoint进度 + CKPT=$(ls collaborativeagents/results/reflection_v2/*/reflection/checkpoint.json 2>/dev/null | head -1) + if [ -n "$CKPT" ]; then + PROGRESS=$(cat "$CKPT" | python3 -c " +import json, sys +try: + data = json.load(sys.stdin) + total = sum(data['sessions_per_profile'].values()) + print(f'{total}/1800 ({total*100//1800}%)') +except: + print('0/1800') +" 2>/dev/null) + + # 检查是否完成 + DONE=$(echo "$PROGRESS" | grep -c "1800/1800") + if [ "$DONE" -eq 1 ]; then + echo "$(date '+%H:%M:%S') reflection_v2 已完成" + break + fi + echo "$(date '+%H:%M:%S') reflection_v2 进度: $PROGRESS" + else + echo "$(date '+%H:%M:%S') 等待reflection_v2启动..." + fi + sleep 60 +done + +echo "启动 topk5_v3..." + +# 启动新实验 +nohup python collaborativeagents/scripts/run_experiments.py \ + --methods rag \ + --datasets math-hard,math-500,bigcodebench \ + --n-profiles 60 \ + --n-sessions 30 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 20 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/rag_topk5_v3 \ + > collaborativeagents/results/rag_topk5_v3.log 2>&1 & + +echo "topk5_v3 已启动,PID: $!" diff --git a/collaborativeagents/scripts/run_experiments.py b/collaborativeagents/scripts/run_experiments.py index e04680c..da3549b 100644 --- a/collaborativeagents/scripts/run_experiments.py +++ b/collaborativeagents/scripts/run_experiments.py @@ -15,6 +15,7 @@ import json import yaml import os import sys +import numpy as np from pathlib import Path from datetime import datetime from typing import List, Dict, Any, Optional @@ -113,7 +114,13 @@ AVAILABLE_METHODS = { "reflection_grpo": "Reflection + GRPO training", "all_memory": "All extracted memories in context (no retrieval)", "rag": "Extractor + RAG (no user vector)", + "rag_dynamic": "Extractor + RAG with dynamic topk (min=3, max=8, ratio=0.5)", + "rag_rewrite": "Extractor + RAG with LLM preference rewrite/merge", + "rag_rewrite_vector": "Extractor + RAG + user vector + LLM preference rewrite", "rag_vector": "Extractor + RAG + user vector (proposed method)", + "rag_vector_fast": "Extractor + RAG + user vector with 10x learning rate", + "rag_vector_consolidate": "Extractor + RAG + user vector with session-level preference consolidation", + "rag_vector_balanced": "Extractor + RAG + user vector with balanced rewards (10x LR + positive signal for good turns)", "rag_bge": "Extractor + RAG with BGE reranker (278M)", "rag_vector_bge": "Extractor + RAG + user vector with BGE reranker (278M)", } @@ -256,6 +263,68 @@ class ExperimentRunner: # Profile will be passed to start_session() when the conversation begins return adapter + def _export_user_vectors(self, method: str, adapters: Dict[int, Any]) -> None: + """ + Export user vectors from all adapters to disk for later analysis. + + Saves both .npz (efficient numpy format) and .json (human-readable). + + Args: + method: Method name for the output directory + adapters: Dict mapping profile_idx to adapter instances + """ + method_dir = self.output_dir / method + + # Collect all user vectors from adapters + all_vectors = {} + for profile_idx, adapter in adapters.items(): + if hasattr(adapter, 'export_all_user_vectors'): + vectors = adapter.export_all_user_vectors() + all_vectors.update(vectors) + + if not all_vectors: + logger.info(f" No user vectors to export for {method}") + return + + # Save as .npz for efficient analysis + npz_path = method_dir / "user_vectors.npz" + user_ids = list(all_vectors.keys()) + k = len(all_vectors[user_ids[0]]["z_long"]) + z_long = np.zeros((len(user_ids), k), dtype=np.float32) + z_short = np.zeros((len(user_ids), k), dtype=np.float32) + reward_ma = np.zeros(len(user_ids), dtype=np.float32) + + for i, uid in enumerate(user_ids): + z_long[i] = all_vectors[uid]["z_long"] + z_short[i] = all_vectors[uid]["z_short"] + reward_ma[i] = all_vectors[uid]["reward_ma"] + + np.savez( + npz_path, + user_ids=np.array(user_ids), + z_long=z_long, + z_short=z_short, + reward_ma=reward_ma, + ) + + # Also save summary stats as JSON + summary = { + "n_users": len(user_ids), + "vector_dim": k, + "z_long_norms": {uid: all_vectors[uid]["z_long_norm"] for uid in user_ids}, + "z_short_norms": {uid: all_vectors[uid]["z_short_norm"] for uid in user_ids}, + "reward_mas": {uid: all_vectors[uid]["reward_ma"] for uid in user_ids}, + "stats": { + "z_long_norm_mean": float(np.mean([all_vectors[uid]["z_long_norm"] for uid in user_ids])), + "z_long_norm_max": float(np.max([all_vectors[uid]["z_long_norm"] for uid in user_ids])), + "z_long_norm_std": float(np.std([all_vectors[uid]["z_long_norm"] for uid in user_ids])), + } + } + with open(method_dir / "user_vectors_summary.json", "w") as f: + json.dump(summary, f, indent=2) + + logger.info(f" Exported {len(user_ids)} user vectors to {npz_path}") + def run_single_session( self, method: str, @@ -297,11 +366,11 @@ class ExperimentRunner: # Structured preferences with condition/action pref_str = "\n".join([ f"- When {p.get('condition', '')}, {p.get('action', '')}" - for p in user_prefs[:10] # Top 10 preferences + for p in user_prefs ]) else: # Simple string preferences - pref_str = "\n".join([f"- {p}" for p in user_prefs[:10]]) + pref_str = "\n".join([f"- {p}" for p in user_prefs]) else: pref_str = str(user_prefs) @@ -619,6 +688,9 @@ class ExperimentRunner: json.dump(results, f, indent=2) logger.info(f" Profile {profile_idx + 1} completed and checkpointed") + # Export user vectors at the end of sequential processing + self._export_user_vectors(method, {0: adapter}) + return results def _run_method_parallel( @@ -690,6 +762,10 @@ class ExperimentRunner: except Exception as e: logger.error(f" Profile {profile_idx} failed: {e}") + # Note: Parallel mode doesn't export user vectors because adapters are + # created/destroyed per profile. Use batch mode for vector export. + logger.info(f" Parallel mode: user vectors not exported (use batch mode)") + def _run_method_batch( self, method: str, @@ -724,7 +800,7 @@ class ExperimentRunner: else: user_client = BatchVLLMClient( vllm_url=self.config.vllm_user_url, - max_tokens=4096, + max_tokens=1024, # User responses typically short, but allow for edge cases temperature=1.0, timeout=None, max_concurrent=100, @@ -799,21 +875,34 @@ class ExperimentRunner: adapters = {} profile_sessions = {} + # Build session problem list ONCE (shared across all profiles for controlled comparison) + # Each dataset contributes exactly n_per_dataset problems (front 10), no repeats + shared_sessions = [] + dataset_names = list(self.datasets.keys()) + n_per_dataset = self.config.n_sessions_per_profile // len(dataset_names) + remainder = self.config.n_sessions_per_profile % len(dataset_names) + + for i, ds_name in enumerate(dataset_names): + ds_obj = self.datasets[ds_name] + items = ds_obj.get_testset() + n_take = n_per_dataset + (1 if i < remainder else 0) + if n_take > len(items): + logger.warning(f" Dataset {ds_name} has only {len(items)} problems, need {n_take}") + for j in range(n_take): + item = items[j % len(items)] + shared_sessions.append({"problem": item.problem, "solution": item.solution, "domain": ds_obj.domain}) + + n_conflict = int(len(shared_sessions) * self.config.conflict_ratio) + shared_session_list = [(s, idx < n_conflict) for idx, s in enumerate(shared_sessions)] + logger.info(f" Built shared session list: {len(shared_sessions)} problems from {len(dataset_names)} datasets ({n_per_dataset} each, same for all profiles)") + for profile_idx in profiles_to_run: profile = self.profiles[profile_idx] adapter = self._create_method_adapter(method, profile, use_shared_models=True) if hasattr(adapter, 'initialize'): adapter.initialize() adapters[profile_idx] = adapter - - sessions = [] - for ds_name, ds_obj in self.datasets.items(): - ds_items = ds_obj.get_testset() - for item in ds_items[:self.config.n_sessions_per_profile]: - sessions.append({"problem": item.problem, "solution": item.solution, "domain": ds_obj.domain}) - sessions = sessions[:self.config.n_sessions_per_profile] - n_conflict = int(len(sessions) * self.config.conflict_ratio) - profile_sessions[profile_idx] = [(s, idx < n_conflict) for idx, s in enumerate(sessions)] + profile_sessions[profile_idx] = shared_session_list n_sessions = self.config.n_sessions_per_profile @@ -860,9 +949,9 @@ class ExperimentRunner: user_prefs = profile.get("preferences", []) if isinstance(user_prefs, list) and user_prefs: if isinstance(user_prefs[0], dict): - pref_str = "\n".join([f"- When {p.get('condition','')}, {p.get('action','')}" for p in user_prefs[:10]]) + pref_str = "\n".join([f"- When {p.get('condition','')}, {p.get('action','')}" for p in user_prefs]) else: - pref_str = "\n".join([f"- {p}" for p in user_prefs[:10]]) + pref_str = "\n".join([f"- {p}" for p in user_prefs]) else: pref_str = str(user_prefs) @@ -916,21 +1005,105 @@ class ExperimentRunner: state["conversation"].append({"role": "user", "content": user_msg}) state["full_log"].append(parsed) - if parsed.get("enforce_preferences", False): + enforce = parsed.get("enforce_preferences", False) + if isinstance(enforce, str): + enforce = enforce.lower() == "true" + if enforce: state["enforcement_count"] += 1 + # Detect disappointment and satisfaction from user message + # Disappointment indicators (not quite right, could be better, etc.) + user_msg_lower = user_msg.lower() + disappointment = any(phrase in user_msg_lower for phrase in [ + "not quite", "not what i", "that's not", "incorrect", + "wrong", "mistake", "error", "confused", "doesn't make sense", + "try again", "not helpful", "not useful" + ]) + # Satisfaction indicators (explicit positive feedback) + satisfaction = parsed.get("should_terminate", False) or any(phrase in user_msg_lower for phrase in [ + "perfect", "exactly", "great", "thanks", "helpful", + "that's right", "correct", "good job", "well done", + "makes sense", "understand now", "got it" + ]) + + # Store parsed feedback for REINFORCE (applied AFTER prepare_prompt sets pending_rl_update) + state["_pending_feedback"] = { + "user_msg": user_msg, + "enforce": bool(enforce), + "disappointment": disappointment and not enforce, # Don't double-count + "satisfaction": satisfaction and not enforce, # Don't count if also enforcing + "draft_answer": bool(parsed.get("draft_answer")), + } + if parsed.get("should_terminate", False) or TERMINATION_SIGNAL in user_msg: to_remove.append(pidx) continue - # Prepare agent prompt for batching (don't call LLM yet) + # Batch preference extraction for PersonalizedLLM adapters + extraction_batch = [] # (pidx, query) + remaining_active = [pidx for pidx in active_list if pidx not in to_remove] + for pidx in remaining_active: + adapter = adapters.get(pidx) + if adapter and hasattr(adapter, '_llm') and hasattr(adapter._llm, 'enable_preference_extraction'): + if adapter._llm.enable_preference_extraction and adapter._llm._extractor is not None: + query = adapter._llm.get_last_user_query(adapter._current_user_id) if hasattr(adapter._llm, 'get_last_user_query') else None + if not query: + state = all_states[pidx] + query = state["conversation"][-1]["content"] if state["conversation"] else "" + if query: + extraction_batch.append((pidx, query)) + + if extraction_batch: + extractor = extraction_batch[0][1] # just need any adapter to get the extractor + adapter0 = adapters[extraction_batch[0][0]] + shared_extractor = adapter0._llm._extractor + if hasattr(shared_extractor, 'batch_extract_preferences'): + queries = [q for _, q in extraction_batch] + batch_results = shared_extractor.batch_extract_preferences(queries) + for (pidx, _), pref_dict in zip(extraction_batch, batch_results): + adapter = adapters[pidx] + adapter._llm.apply_extracted_preferences(adapter._current_user_id, pref_dict) + else: + # Fallback: sequential + for pidx, query in extraction_batch: + adapter = adapters[pidx] + adapter._llm._extractor.extract_turn(adapter._llm._sessions[adapter._current_user_id].session_state.history) + + # Batch scaffolding for reflection adapters before prepare_prompt + scaffolding_batch = [] # (pidx, prompt) + remaining_active = [pidx for pidx in active_list if pidx not in to_remove] + for pidx in remaining_active: + adapter = adapters.get(pidx) + if adapter and hasattr(adapter, 'get_scaffolding_prompt'): + state = all_states[pidx] + # Temporarily add user msg to history for scaffolding + agent_notes = adapter._user_notes.get(adapter._current_user_id, "No notes yet about this user.") + if adapter.with_scaffolding and agent_notes != "No notes yet about this user.": + prompt = adapter.get_scaffolding_prompt( + state["conversation"], agent_notes) + if prompt is not None: + scaffolding_batch.append((pidx, prompt)) + + if scaffolding_batch: + scaff_messages = [[{"role": "user", "content": p}] for _, p in scaffolding_batch] + scaff_responses = agent_client.batch_completion(scaff_messages) + for (pidx, _), resp in zip(scaffolding_batch, scaff_responses): + adapter = adapters[pidx] + adapter._scaffolding_result = resp if resp else None + + # Prepare agent prompts for batching + # NOTE: prepare_prompt calls chat_prepare which sets pending_rl_update + # from the previous turn's data. REINFORCE feedback must be applied + # AFTER this call so that pending_rl_update is available. + for pidx in remaining_active: + state = all_states[pidx] try: adapter = adapters[pidx] + user_msg = state["conversation"][-1]["content"] if hasattr(adapter, 'prepare_prompt'): messages, context = adapter.prepare_prompt(user_msg, state["conversation"][:-1]) agent_prompts_batch.append((pidx, messages, context)) elif hasattr(adapter, 'generate_response'): - # Fallback for adapters without prepare_prompt agent_prompts_batch.append((pidx, None, None)) else: state["conversation"].append({"role": "assistant", "content": "[Error: Adapter not configured]"}) @@ -938,6 +1111,53 @@ class ExperimentRunner: logger.error(f" Agent prepare error p{pidx} t{turn}: {e}") state["conversation"].append({"role": "assistant", "content": "I apologize, I encountered an error. Could you rephrase?"}) + # Apply REINFORCE feedback NOW (after prepare_prompt set pending_rl_update) + for pidx in remaining_active: + state = all_states[pidx] + fb = state.pop("_pending_feedback", None) + if fb: + adapter = adapters.get(pidx) + if adapter and hasattr(adapter, 'process_user_turn'): + adapter.process_user_turn( + user_response=fb["user_msg"], + enforce_preferences=fb["enforce"], + express_disappointment=fb.get("disappointment", False), + express_satisfaction=fb["satisfaction"], + draft_answer_updated=fb["draft_answer"], + ) + + # Also apply feedback for terminated sessions (they skipped prepare_prompt + # but still need the reward signal from their last turn) + for pidx in to_remove: + state = all_states.get(pidx) + if not state: + continue + fb = state.pop("_pending_feedback", None) + if fb: + adapter = adapters.get(pidx) + if adapter and hasattr(adapter, 'process_user_turn'): + # For terminated sessions, we can't call prepare_prompt + # (no next turn), but we still want the reward applied. + # Call chat_prepare with a dummy to set pending_rl_update, + # then apply feedback. + try: + if hasattr(adapter, '_llm') and hasattr(adapter._llm, 'chat_prepare'): + adapter._llm.chat_prepare( + adapter._current_user_id, + fb["user_msg"], + skip_extraction=True, + skip_auto_reward=True, + ) + adapter.process_user_turn( + user_response=fb["user_msg"], + enforce_preferences=fb["enforce"], + express_disappointment=fb.get("disappointment", False), + express_satisfaction=fb["satisfaction"], + draft_answer_updated=fb["draft_answer"], + ) + except Exception: + pass # Best effort for terminated sessions + # Batch vLLM call for all agent prompts if agent_prompts_batch: # Separate prompts that can be batched from fallback @@ -979,6 +1199,25 @@ class ExperimentRunner: active_set -= set(to_remove) + # Batch note-update for reflection adapters before end_session + note_update_batch = [] # (profile_idx, messages) + for profile_idx in profiles_to_run: + if profile_idx not in all_states: + continue + adapter = adapters.get(profile_idx) + if adapter and hasattr(adapter, 'get_note_update_prompt'): + prompt_msgs = adapter.get_note_update_prompt() + if prompt_msgs is not None: + note_update_batch.append((profile_idx, prompt_msgs)) + + if note_update_batch: + note_messages = [msgs for _, msgs in note_update_batch] + note_responses = agent_client.batch_completion(note_messages) + for (profile_idx, _), resp in zip(note_update_batch, note_responses): + if resp: + adapter = adapters[profile_idx] + adapter.apply_note_update_response(resp) + # Save results for this session round for profile_idx in profiles_to_run: if profile_idx not in all_states: @@ -995,10 +1234,20 @@ class ExperimentRunner: task_success = 0 for entry in full_log: if entry.get("should_terminate", False): - draft = entry.get("draft_answer", "") - if draft and "don't know" not in draft.lower() and len(draft) > 20: + draft = str(entry.get("draft_answer", "")) + if draft and "don't know" not in draft.lower(): task_success = 1 + # End session on adapter (applies task completion reward for REINFORCE) + adapter = adapters.get(profile_idx) + if adapter and hasattr(adapter, 'end_session'): + # Skip note update if batch already handled it + skip_notes = hasattr(adapter, 'get_note_update_prompt') + try: + adapter.end_session(task_success=bool(task_success), skip_note_update=skip_notes) + except TypeError: + adapter.end_session(task_success=bool(task_success)) + results.append({ "method": method, "profile_id": self.profiles[profile_idx].get("user_id", f"user_{profile_idx}"), @@ -1023,6 +1272,33 @@ class ExperimentRunner: "adapter_metrics": {}, }) + # Collect adapter metrics (e.g. user_vector_norm for rag_vector) + adapter = adapters.get(profile_idx) + if adapter and hasattr(adapter, 'get_user_vector'): + user_id = self.profiles[profile_idx].get("user_id", f"user_{profile_idx}") + vec = adapter.get_user_vector(user_id) + if vec is not None: + results[-1]["adapter_metrics"] = { + "user_vector_norm": float(np.linalg.norm(vec)), + } + + # Save user vector snapshots every 10 sessions + if (session_idx + 1) % 10 == 0: + vectors_dir = checkpoint_file.parent / "vectors" + vectors_dir.mkdir(parents=True, exist_ok=True) + user_vectors = {} + for profile_idx in profiles_to_run: + adapter = adapters.get(profile_idx) + if adapter and hasattr(adapter, 'get_user_vector'): + user_id = self.profiles[profile_idx].get("user_id", f"user_{profile_idx}") + vec = adapter.get_user_vector(user_id) + if vec is not None: + user_vectors[user_id] = vec + if user_vectors: + snapshot_path = vectors_dir / f"vectors_session_{session_idx+1}.npy" + np.save(snapshot_path, user_vectors) + logger.info(f" Saved {len(user_vectors)} user vectors to {snapshot_path}") + # Checkpoint after each session round with session-level tracking # Only increment for profiles that actually ran in this round (those in all_states) for profile_idx in all_states.keys(): @@ -1043,6 +1319,9 @@ class ExperimentRunner: rate = sessions_done / elapsed * 3600 if elapsed > 0 else 0 logger.info(f" Session round {session_idx+1}/{n_sessions}: {sessions_done} total, {rate:.0f} sessions/hr") + # Export user vectors before cleanup (for RAG methods with user vectors) + self._export_user_vectors(method, adapters) + # Explicitly free adapter models to prevent GPU OOM across methods for pidx, adapter in adapters.items(): if hasattr(adapter, 'cleanup'): diff --git a/collaborativeagents/scripts/test_new_rewrite.sh b/collaborativeagents/scripts/test_new_rewrite.sh new file mode 100755 index 0000000..1ade8ea --- /dev/null +++ b/collaborativeagents/scripts/test_new_rewrite.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# 小规模测试:验证新的rewrite prompt是否降低E/T +# 10 profiles × 10 sessions = 100 sessions + +echo "$(date '+%H:%M:%S') 启动 rag_rewrite 小规模测试 (新prompt)..." + +cd /workspace/personalization-user-model + +python collaborativeagents/scripts/run_experiments.py \ + --methods rag_rewrite \ + --datasets math-hard,bigcodebench \ + --n-profiles 10 \ + --n-sessions 10 \ + --max-turns 10 \ + --use-vllm \ + --vllm-agent-url http://localhost:8003/v1 \ + --vllm-user-url http://localhost:8004/v1 \ + --use-batch-processing \ + --batch-size 4 \ + --parallel-profiles 10 \ + --profile-path collaborativeagents/data/complex_profiles_v2/profiles_200.jsonl \ + --output-dir collaborativeagents/results/test_new_rewrite_10x10 + +echo "$(date '+%H:%M:%S') 测试完成" + +# 自动分析结果 +python3 << 'ANALYZE' +import json +import numpy as np + +result_path = "collaborativeagents/results/test_new_rewrite_10x10" +import glob +results_file = glob.glob(f"{result_path}/*/rag_rewrite/results.json") + +if results_file: + with open(results_file[0]) as f: + data = json.load(f) + + enforcements = sum(r["metrics"]["enforcement_count"] for r in data) + turns = sum(r["metrics"]["total_turns"] for r in data) + successes = sum(1 for r in data if r["metrics"]["task_success"]) + + print(f"\n=== 新Rewrite Prompt测试结果 ===") + print(f"Sessions: {len(data)}") + print(f"Success Rate: {100*successes/len(data):.1f}%") + print(f"E/T: {enforcements/turns:.4f}") + print(f"(对比旧rewrite E/T: 0.194)") +else: + print("结果文件未找到") +ANALYZE diff --git a/collaborativeagents/scripts/visualize_user_vectors.py b/collaborativeagents/scripts/visualize_user_vectors.py new file mode 100644 index 0000000..203cb68 --- /dev/null +++ b/collaborativeagents/scripts/visualize_user_vectors.py @@ -0,0 +1,407 @@ +#!/usr/bin/env python3 +""" +User Vector Visualization Script + +Visualizes learned user vectors using t-SNE and PCA for dimensionality reduction. +Supports multiple coloring schemes to analyze user clusters. + +Usage: + python visualize_user_vectors.py --results-dir ../results/fullrun_3methods + python visualize_user_vectors.py --vectors-file user_vectors.npy --profiles-file profiles.json +""" + +import argparse +import json +import numpy as np +import matplotlib.pyplot as plt +from pathlib import Path +from typing import Dict, List, Optional, Tuple +from sklearn.manifold import TSNE +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler +import warnings +warnings.filterwarnings('ignore') + + +def load_user_vectors(results_dir: Path) -> Tuple[np.ndarray, List[int]]: + """Load user vectors from experiment results.""" + vectors = [] + user_ids = [] + + # Try to find user vectors in different locations + possible_paths = [ + results_dir / "user_vectors.npy", + results_dir / "rag_vector" / "user_vectors.npy", + results_dir / "checkpoints" / "user_vectors.npy", + ] + + for path in possible_paths: + if path.exists(): + data = np.load(path, allow_pickle=True) + if isinstance(data, np.ndarray): + if data.dtype == object: + # Dictionary format + data = data.item() + for uid, vec in data.items(): + user_ids.append(int(uid)) + vectors.append(vec) + else: + # Direct array format + vectors = data + user_ids = list(range(len(data))) + print(f"Loaded {len(vectors)} user vectors from {path}") + return np.array(vectors), user_ids + + # Try to extract from results.json + results_files = list(results_dir.glob("**/results.json")) + for rf in results_files: + try: + with open(rf) as f: + data = json.load(f) + # Extract user vectors if stored in results + if isinstance(data, dict) and "user_vectors" in data: + for uid, vec in data["user_vectors"].items(): + user_ids.append(int(uid)) + vectors.append(np.array(vec)) + print(f"Loaded {len(vectors)} user vectors from {rf}") + return np.array(vectors), user_ids + except: + continue + + raise FileNotFoundError(f"No user vectors found in {results_dir}") + + +def load_profiles(profiles_path: Path) -> List[Dict]: + """Load user profiles for labeling.""" + if profiles_path.suffix == '.jsonl': + profiles = [] + with open(profiles_path) as f: + for line in f: + profiles.append(json.loads(line)) + return profiles + else: + with open(profiles_path) as f: + return json.load(f) + + +def extract_profile_features(profiles: List[Dict]) -> Dict[str, List]: + """Extract features from profiles for coloring.""" + features = { + "categories": [], + "n_preferences": [], + "persona_length": [], + } + + for p in profiles: + # Extract categories if available + cats = p.get("categories", []) + features["categories"].append(cats[0] if cats else "unknown") + + # Number of preferences + prefs = p.get("preferences", []) + features["n_preferences"].append(len(prefs)) + + # Persona length + persona = p.get("persona", "") + features["persona_length"].append(len(persona)) + + return features + + +def apply_tsne(vectors: np.ndarray, perplexity: int = 30, max_iter: int = 1000) -> np.ndarray: + """Apply t-SNE dimensionality reduction.""" + # Standardize vectors + scaler = StandardScaler() + vectors_scaled = scaler.fit_transform(vectors) + + # Adjust perplexity if needed + n_samples = len(vectors) + perplexity = min(perplexity, n_samples - 1) + + tsne = TSNE( + n_components=2, + perplexity=perplexity, + max_iter=max_iter, + random_state=42, + init='pca', + learning_rate='auto' + ) + return tsne.fit_transform(vectors_scaled) + + +def apply_pca(vectors: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Apply PCA dimensionality reduction. Returns (2D projection, explained variance).""" + scaler = StandardScaler() + vectors_scaled = scaler.fit_transform(vectors) + + pca = PCA(n_components=min(10, vectors.shape[1])) + transformed = pca.fit_transform(vectors_scaled) + + return transformed[:, :2], pca.explained_variance_ratio_ + + +def plot_comparison( + vectors: np.ndarray, + user_ids: List[int], + profiles: Optional[List[Dict]] = None, + output_path: Optional[Path] = None, + title_prefix: str = "" +): + """Create side-by-side t-SNE and PCA plots.""" + + # Apply dimensionality reduction + print("Applying t-SNE...") + tsne_2d = apply_tsne(vectors) + + print("Applying PCA...") + pca_2d, pca_variance = apply_pca(vectors) + + # Prepare coloring + if profiles and len(profiles) >= len(user_ids): + features = extract_profile_features(profiles) + color_by = features["n_preferences"] + color_label = "Number of Preferences" + else: + color_by = user_ids + color_label = "User ID" + + # Create figure + fig, axes = plt.subplots(1, 2, figsize=(16, 7)) + + # t-SNE plot + ax1 = axes[0] + scatter1 = ax1.scatter( + tsne_2d[:, 0], tsne_2d[:, 1], + c=color_by, cmap='viridis', alpha=0.7, s=50 + ) + ax1.set_xlabel('t-SNE Dimension 1') + ax1.set_ylabel('t-SNE Dimension 2') + ax1.set_title(f'{title_prefix}t-SNE Visualization\n({len(vectors)} users)') + plt.colorbar(scatter1, ax=ax1, label=color_label) + + # PCA plot + ax2 = axes[1] + scatter2 = ax2.scatter( + pca_2d[:, 0], pca_2d[:, 1], + c=color_by, cmap='viridis', alpha=0.7, s=50 + ) + ax2.set_xlabel(f'PC1 ({pca_variance[0]*100:.1f}% variance)') + ax2.set_ylabel(f'PC2 ({pca_variance[1]*100:.1f}% variance)') + ax2.set_title(f'{title_prefix}PCA Visualization\n(Top 2 components: {(pca_variance[0]+pca_variance[1])*100:.1f}% variance)') + plt.colorbar(scatter2, ax=ax2, label=color_label) + + plt.tight_layout() + + if output_path: + plt.savefig(output_path, dpi=150, bbox_inches='tight') + print(f"Saved comparison plot to {output_path}") + + plt.show() + + return tsne_2d, pca_2d, pca_variance + + +def plot_by_category( + vectors: np.ndarray, + user_ids: List[int], + profiles: List[Dict], + output_path: Optional[Path] = None +): + """Create plots colored by preference category.""" + + features = extract_profile_features(profiles) + categories = features["categories"] + unique_cats = list(set(categories)) + cat_to_idx = {c: i for i, c in enumerate(unique_cats)} + cat_colors = [cat_to_idx[c] for c in categories[:len(user_ids)]] + + # Apply reductions + tsne_2d = apply_tsne(vectors) + pca_2d, pca_variance = apply_pca(vectors) + + fig, axes = plt.subplots(1, 2, figsize=(16, 7)) + + # t-SNE by category + ax1 = axes[0] + scatter1 = ax1.scatter( + tsne_2d[:, 0], tsne_2d[:, 1], + c=cat_colors, cmap='tab10', alpha=0.7, s=50 + ) + ax1.set_xlabel('t-SNE Dimension 1') + ax1.set_ylabel('t-SNE Dimension 2') + ax1.set_title('t-SNE by Preference Category') + + # PCA by category + ax2 = axes[1] + scatter2 = ax2.scatter( + pca_2d[:, 0], pca_2d[:, 1], + c=cat_colors, cmap='tab10', alpha=0.7, s=50 + ) + ax2.set_xlabel(f'PC1 ({pca_variance[0]*100:.1f}%)') + ax2.set_ylabel(f'PC2 ({pca_variance[1]*100:.1f}%)') + ax2.set_title('PCA by Preference Category') + + # Add legend + handles = [plt.scatter([], [], c=[cat_to_idx[c]], cmap='tab10', label=c) + for c in unique_cats[:10]] # Limit to 10 categories + fig.legend(handles, unique_cats[:10], loc='center right', title='Category') + + plt.tight_layout() + plt.subplots_adjust(right=0.85) + + if output_path: + plt.savefig(output_path, dpi=150, bbox_inches='tight') + print(f"Saved category plot to {output_path}") + + plt.show() + + +def plot_pca_variance(vectors: np.ndarray, output_path: Optional[Path] = None): + """Plot PCA explained variance to understand dimensionality.""" + scaler = StandardScaler() + vectors_scaled = scaler.fit_transform(vectors) + + n_components = min(50, vectors.shape[1], vectors.shape[0]) + pca = PCA(n_components=n_components) + pca.fit(vectors_scaled) + + fig, axes = plt.subplots(1, 2, figsize=(14, 5)) + + # Individual variance + ax1 = axes[0] + ax1.bar(range(1, n_components + 1), pca.explained_variance_ratio_ * 100) + ax1.set_xlabel('Principal Component') + ax1.set_ylabel('Explained Variance (%)') + ax1.set_title('PCA Explained Variance by Component') + ax1.set_xlim(0, n_components + 1) + + # Cumulative variance + ax2 = axes[1] + cumvar = np.cumsum(pca.explained_variance_ratio_) * 100 + ax2.plot(range(1, n_components + 1), cumvar, 'b-o', markersize=4) + ax2.axhline(y=90, color='r', linestyle='--', label='90% variance') + ax2.axhline(y=95, color='g', linestyle='--', label='95% variance') + ax2.set_xlabel('Number of Components') + ax2.set_ylabel('Cumulative Explained Variance (%)') + ax2.set_title('PCA Cumulative Explained Variance') + ax2.legend() + ax2.set_xlim(0, n_components + 1) + ax2.set_ylim(0, 105) + + # Find components needed for 90% and 95% variance + n_90 = np.argmax(cumvar >= 90) + 1 + n_95 = np.argmax(cumvar >= 95) + 1 + print(f"Components for 90% variance: {n_90}") + print(f"Components for 95% variance: {n_95}") + + plt.tight_layout() + + if output_path: + plt.savefig(output_path, dpi=150, bbox_inches='tight') + print(f"Saved variance plot to {output_path}") + + plt.show() + + return pca.explained_variance_ratio_ + + +def generate_synthetic_vectors(n_users: int = 200, dim: int = 64) -> np.ndarray: + """Generate synthetic user vectors for testing visualization.""" + np.random.seed(42) + + # Create 5 clusters of users + n_clusters = 5 + cluster_size = n_users // n_clusters + vectors = [] + + for i in range(n_clusters): + # Each cluster has a different center + center = np.random.randn(dim) * 2 + # Users in cluster are variations around center + cluster_vectors = center + np.random.randn(cluster_size, dim) * 0.5 + vectors.append(cluster_vectors) + + # Add remaining users + remaining = n_users - n_clusters * cluster_size + if remaining > 0: + vectors.append(np.random.randn(remaining, dim)) + + return np.vstack(vectors) + + +def main(): + parser = argparse.ArgumentParser(description="Visualize user vectors with t-SNE and PCA") + parser.add_argument("--results-dir", type=str, help="Path to experiment results directory") + parser.add_argument("--vectors-file", type=str, help="Path to user vectors .npy file") + parser.add_argument("--profiles-file", type=str, help="Path to user profiles JSON file") + parser.add_argument("--output-dir", type=str, default=".", help="Output directory for plots") + parser.add_argument("--demo", action="store_true", help="Run demo with synthetic data") + args = parser.parse_args() + + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + if args.demo: + print("Running demo with synthetic user vectors...") + vectors = generate_synthetic_vectors(200, 64) + user_ids = list(range(200)) + profiles = None + title_prefix = "[Demo] " + elif args.vectors_file: + vectors = np.load(args.vectors_file) + user_ids = list(range(len(vectors))) + profiles = None + if args.profiles_file: + profiles = load_profiles(Path(args.profiles_file)) + title_prefix = "" + elif args.results_dir: + results_dir = Path(args.results_dir) + vectors, user_ids = load_user_vectors(results_dir) + + # Try to find profiles + profiles = None + profile_paths = [ + results_dir / "generated_profiles.json", + results_dir.parent / "profiles.json", + Path("../data/complex_profiles_v2/profiles_200.jsonl"), + ] + for pp in profile_paths: + if pp.exists(): + profiles = load_profiles(pp) + print(f"Loaded {len(profiles)} profiles from {pp}") + break + title_prefix = "" + else: + print("Please provide --results-dir, --vectors-file, or --demo") + return + + print(f"\nUser vectors shape: {vectors.shape}") + print(f"Number of users: {len(user_ids)}") + + # Generate plots + print("\n=== Generating comparison plot ===") + plot_comparison( + vectors, user_ids, profiles, + output_path=output_dir / "user_vectors_comparison.png", + title_prefix=title_prefix + ) + + print("\n=== Generating PCA variance plot ===") + plot_pca_variance( + vectors, + output_path=output_dir / "user_vectors_pca_variance.png" + ) + + if profiles and len(profiles) >= len(user_ids): + print("\n=== Generating category plot ===") + plot_by_category( + vectors, user_ids, profiles, + output_path=output_dir / "user_vectors_by_category.png" + ) + + print("\n=== Done! ===") + print(f"Plots saved to {output_dir}") + + +if __name__ == "__main__": + main() |
