#!/usr/bin/env python3
"""
Example: Using the PersonalizedLLM Interface for Evaluation.

This script demonstrates the evaluation interface that can be used
by a user simulator or evaluation framework.

Call sequence per evaluation run:
1. reset_user(user_id) - Start fresh for this user's "life"
2. For each session (s=1..S):
    a. reset_session(user_id) - New chat window
    b. For each turn (t=1..T):
        i.   [Turn 2+] apply_feedback() for previous turn
        ii.  resp = chat(user_id, query)
        iii. [Simulator computes reward from response]
3. persist() - Save state at end
"""

import sys
import os

# Add src to sys.path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../src"))

from personalization.serving import (
    PersonalizedLLM,
    AssistantResponse,
    Feedback,
)


def main():
    print("=" * 60)
    print("PersonalizedLLM Evaluation Interface Demo")
    print("=" * 60)
    
    # Initialize the system
    # Note: This will load models, which takes time and GPU memory
    print("\n[1] Initializing PersonalizedLLM...")
    
    llm = PersonalizedLLM(
        user_store_path="data/users/user_store_eval_demo.npz",
        only_own_memories=True,
        enable_preference_extraction=True,
        enable_rl_updates=True,
    )
    
    # Define test user
    user_id = "eval_demo_user"
    
    # Reset user for clean experiment
    print(f"\n[2] Resetting user: {user_id}")
    llm.reset_user(user_id)
    
    # Check initial state
    print(f"\n[3] Initial user state:")
    print(f"    {llm.get_user_state_summary(user_id)}")
    
    # Simulate multiple sessions
    num_sessions = 2
    queries_per_session = [
        # Session 1: Food preferences
        [
            "What's a good recipe for dinner tonight?",
            "I prefer vegetarian food with Asian flavors.",
            "Can you suggest something spicy?",
        ],
        # Session 2: Test personalization retention
        [
            "What should I cook for lunch?",
            "Give me a quick meal idea.",
        ],
    ]
    
    all_responses = []
    
    for session_idx, session_queries in enumerate(queries_per_session):
        print(f"\n{'=' * 60}")
        print(f"SESSION {session_idx + 1}")
        print("=" * 60)
        
        # Reset session (new chat window)
        llm.reset_session(user_id)
        print(f"[Session {session_idx + 1}] Started new session")
        
        session_responses = []
        
        for turn_idx, query in enumerate(session_queries):
            print(f"\n--- Turn {turn_idx + 1} ---")
            
            # Apply feedback for previous turn (from turn 2 onwards)
            if turn_idx > 0:
                # Simulated feedback - in real eval, this comes from user simulator
                simulated_reward = 0.7 + 0.1 * (turn_idx % 2)  # Varies by turn
                simulated_gating = 1.0 if turn_idx > 0 else 0.0
                
                feedback = Feedback(
                    user_id=user_id,
                    turn_id=turn_idx - 1,
                    reward=simulated_reward,
                    gating=simulated_gating,
                    meta={"source": "demo_simulator"}
                )
                
                print(f"[Feedback] Applying: reward={simulated_reward:.2f}, gating={simulated_gating:.1f}")
                llm.apply_feedback(feedback)
            
            # Main chat call
            print(f"User: {query}")
            response: AssistantResponse = llm.chat(user_id, query)
            
            print(f"Assistant: {response.answer[:200]}..." if len(response.answer) > 200 else f"Assistant: {response.answer}")
            print(f"[Usage] prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, model={response.usage.model}")
            
            if response.debug:
                print(f"[Debug] memories={len(response.debug.selected_memory_ids)}, z_long_norm={response.debug.extra.get('z_long_norm', 0):.4f}")
                if response.debug.extracted_preferences:
                    print(f"[Debug] Extracted {len(response.debug.extracted_preferences)} preferences")
            
            session_responses.append(response)
        
        all_responses.append(session_responses)
        
        # Show user state after session
        print(f"\n[Session {session_idx + 1}] Final state:")
        print(f"    {llm.get_user_state_summary(user_id)}")
    
    # Summary
    print(f"\n{'=' * 60}")
    print("EVALUATION SUMMARY")
    print("=" * 60)
    
    total_tokens = sum(
        r.usage.total_tokens 
        for session in all_responses 
        for r in session
    )
    total_turns = sum(len(s) for s in all_responses)
    
    print(f"Total sessions: {len(all_responses)}")
    print(f"Total turns: {total_turns}")
    print(f"Total tokens: {total_tokens}")
    print(f"Final user state: {llm.get_user_state_summary(user_id)}")
    
    # Persist (optional, for saving state between runs)
    # llm.persist()
    # print("\nState persisted to disk.")
    
    print("\nDemo complete!")


if __name__ == "__main__":
    main()