"""
Blazing Eights — Human vs AI interactive game.

Play against the trained PPO agent in your terminal.

Usage:
  python versus.py --model blazing_ppo_final.pt
  python versus.py --model blazing_ppo_final.pt --num_players 3  # you + 2 AI
"""

import argparse
import torch
import torch.nn.functional as F
import numpy as np
from blazing_env import (
    BlazingEightsEnv, card_name, card_suit, card_rank,
    is_swap, RANK_8, RANK_J, RANK_Q, RANK_K,
    NUM_CARDS, TOTAL_ACTIONS, DRAW_ACTION, PASS_ACTION,
)
from train import PolicyValueNet

AI_COLOR = "\033[91m"   # red for AI actions
AI_RESET = "\033[0m"
# Suit colors: ♠blue ♥magenta ♦yellow ♣cyan
SUIT_COLORS = ["\033[94m", "\033[35m", "\033[93m", "\033[96m"]

SUIT_SYMBOLS = ["♠", "♥", "♦", "♣"]
SUIT_LETTERS = {"s": 0, "h": 1, "d": 2, "c": 3,
                "♠": 0, "♥": 1, "♦": 2, "♣": 3}
RANK_NAMES = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "J", "Q", "K"]


def card_effect(c: int, num_players: int = 2) -> str:
    """Return a short effect tag for special cards."""
    if is_swap(c):
        return "\033[93m换牌\033[0m"
    r = card_rank(c)
    if r == RANK_8:
        return "\033[93m万能\033[0m"
    if r == RANK_K:
        return "\033[93m全摸\033[0m"
    if r == RANK_Q:
        return "\033[93m反转\033[0m" if num_players > 2 else ""
    if r == RANK_J:
        return "\033[93m跳过\033[0m"
    return ""


def pretty_card(c: int) -> str:
    if is_swap(c):
        return "\033[95mSWAP\033[0m"
    suit = card_suit(c)
    rank = RANK_NAMES[card_rank(c)]
    return f"{SUIT_COLORS[suit]}{rank}{SUIT_SYMBOLS[suit]}\033[0m"


def pretty_hand(hand: list[int], num_players: int = 2) -> str:
    sorted_hand = sorted(hand, key=lambda c: (card_suit(c) if not is_swap(c) else 99, c))
    parts = []
    for i, c in enumerate(sorted_hand):
        effect = card_effect(c, num_players)
        tag = f"[{i}] {pretty_card(c)}"
        if effect:
            tag += f"({effect})"
        parts.append(tag)
    return "  ".join(parts)


def print_game_state(env: BlazingEightsEnv, human_player: int, show_ai_hand: bool = False):
    print()
    print("=" * 55)
    top = env.discard[-1]
    top_str = pretty_card(top)
    if env.active_suit is not None:
        s = env.active_suit
        top_str += f"  (指定花色: {SUIT_COLORS[s]}{SUIT_SYMBOLS[s]}{AI_RESET})"
    dir_str = "顺时针 →" if env.direction == 1 else "逆时针 ←"
    print(f"  弃牌堆顶: {top_str}    方向: {dir_str}    牌堆剩余: {len(env.deck)}")
    print("-" * 55)
    for i in range(env.num_players):
        if i == human_player:
            tag = "你"
            hand_str = f"{len(env.hands[i])} 张牌"
        else:
            tag = f"AI-{i}"
            if show_ai_hand:
                hand_str = ", ".join(pretty_card(c) for c in sorted(env.hands[i]))
            else:
                hand_str = f"{len(env.hands[i])} 张牌"
        arrow = " ◀" if i == env.current_player else ""
        print(f"  {tag}: {hand_str}{arrow}")
    print("=" * 55)


def parse_card_input(s: str) -> int:
    s = s.strip().upper()
    if s.startswith("SWAP") or s.startswith("SW"):
        return 52
    if s.startswith("10"):
        rank_str, suit_str = "10", s[2:].lower()
    else:
        rank_str, suit_str = s[0], s[1:].lower()
    rank_map = {r: i for i, r in enumerate(RANK_NAMES)}
    if rank_str not in rank_map or suit_str not in SUIT_LETTERS:
        raise ValueError(f"无法识别: {s}  (格式例: 8h, Ks, 10d, Ac, SWAP)")
    return SUIT_LETTERS[suit_str] * 13 + rank_map[rank_str]


def human_choose_action(env: BlazingEightsEnv, player: int) -> int:
    hand = sorted(env.hands[player], key=lambda c: (card_suit(c) if not is_swap(c) else 99, c))
    legal = env.legal_actions(player)

    if env.phase == "choose_suit":
        print("\n  你打出了 8！选择指定花色:")
        for i, s in enumerate(SUIT_SYMBOLS):
            print(f"    [{i}] {SUIT_COLORS[i]}{s}{AI_RESET}")
        while True:
            try:
                choice = input("  选择 (0-3): ").strip()
                idx = int(choice)
                action = 56 + idx
                if action in legal:
                    return action
                print("  无效选择，请重试")
            except (ValueError, IndexError):
                print("  请输入 0-3")
        return action

    print(f"\n  你的手牌: {pretty_hand(hand, env.num_players)}")

    # Build playable cards display
    playable = [a for a in legal if a < NUM_CARDS]
    can_draw = DRAW_ACTION in legal
    can_pass = PASS_ACTION in legal

    print("  可出的牌:", end="")
    if playable:
        playable_names = []
        for a in playable:
            idx_in_hand = hand.index(a)
            effect = card_effect(a, env.num_players)
            tag = f"[{idx_in_hand}]{pretty_card(a)}"
            if effect:
                tag += f"({effect})"
            playable_names.append(tag)
        print("  " + "  ".join(playable_names))
    else:
        print("  无")

    if can_draw:
        print("  [d] 摸牌")
    if can_pass:
        if env.has_drawn_this_turn and not playable:
            print("  [p] 跳过 (无法出牌)")
        elif env.has_drawn_this_turn:
            print("  [p] 不出牌")
        else:
            print("  [p] 跳过 (牌堆已空)")

    while True:
        choice = input("  你的选择: ").strip().lower()
        if choice == "d" and can_draw:
            return DRAW_ACTION
        if choice == "p" and can_pass:
            return PASS_ACTION
        if choice == "d" and not can_draw:
            print("  牌堆已空，无法摸牌")
            continue
        if choice == "p" and not can_pass:
            print("  还没摸牌，不能直接跳过")
            continue
        if choice == "q":
            raise KeyboardInterrupt
        try:
            idx = int(choice)
            if 0 <= idx < len(hand):
                card = hand[idx]
                if card in playable:
                    return card
                # Handle swap cards (might have multiple)
                if is_swap(card):
                    for a in playable:
                        if is_swap(a):
                            return a
                print(f"  {pretty_card(card)} 不能出，请选其他牌")
            else:
                print(f"  序号超出范围 (0-{len(hand)-1})")
        except ValueError:
            print("  输入序号、d(摸牌) 或 q(退出)")


def ai_choose_action(env: BlazingEightsEnv, model: PolicyValueNet, player: int, device="cpu") -> int:
    obs = env._get_obs(player)
    legal = env.legal_actions(player)
    action, _, value = model.get_action(obs, legal, device)
    return action


def describe_action(player_name: str, action: int, env: BlazingEightsEnv, drawn_card: int = None):
    if action == DRAW_ACTION:
        return f"  {player_name} 摸了一张牌"
    if action == PASS_ACTION:
        return f"  {player_name} 跳过"
    if action >= 56:
        suit = action - 56
        return f"  {player_name} 指定花色: {SUIT_COLORS[suit]}{SUIT_SYMBOLS[suit]}{AI_RESET}"
    desc = f"  {player_name} 打出 {pretty_card(action)}"
    rank = card_rank(action)
    if is_swap(action):
        desc += "  → 与下家交换手牌！"
    elif rank == RANK_8:
        desc += "  → 万能牌！选择花色..."
    elif rank == RANK_K:
        desc += "  → 其他所有人各摸 1 张！"
    elif rank == RANK_Q and env.num_players > 2:
        desc += "  → 反转方向！"
    elif rank == RANK_J:
        desc += "  → 跳过下一位！"
    return desc


def play_game(model_path: str, num_players: int, human_player: int = 0, show_ai: bool = False):
    device = "cpu"
    model = PolicyValueNet()
    checkpoint = torch.load(model_path, map_location=device, weights_only=True)
    model.load_state_dict(checkpoint["model"])
    model.eval()

    print()
    print("╔══════════════════════════════════════╗")
    print("║     Blazing Eights - 人机对战        ║")
    print("╠══════════════════════════════════════╣")
    print(f"║  玩家数: {num_players}   你是: Player {human_player}         ║")
    print("║  输入序号出牌, d摸牌, p跳过, q退出   ║")
    print("╚══════════════════════════════════════╝")

    env = BlazingEightsEnv(num_players=num_players)
    turn = 0

    while not env.done:
        player = env.current_player
        turn += 1

        if player == human_player:
            print_game_state(env, human_player, show_ai_hand=show_ai)
            try:
                action = human_choose_action(env, player)
            except KeyboardInterrupt:
                print("\n\n  你退出了游戏。再见！")
                return

            # Describe human action
            name = "你"
            if action == DRAW_ACTION:
                # Remember hand before draw to find the new card
                hand_before = set(env.hands[player])
                obs, rewards, done, info = env.step(action)
                hand_after = set(env.hands[player])
                new_cards = hand_after - hand_before
                if new_cards:
                    drawn = next(iter(new_cards))
                    print(f"  你摸到了 {pretty_card(drawn)}")
                else:
                    print(f"  牌堆已空，没摸到牌")
                # Turn stays with human — loop back to let them decide
                continue
            elif action == PASS_ACTION:
                print(f"  你选择不出牌，结束回合")
                obs, rewards, done, info = env.step(action)
                continue
            else:
                print(describe_action(name, action, env))
                obs, rewards, done, info = env.step(action)
                # If played an 8, need to choose suit
                if env.phase == "choose_suit" and env._pending_8_player == human_player:
                    suit_action = human_choose_action(env, human_player)
                    si = suit_action - 56
                    print(f"  你指定花色: {SUIT_COLORS[si]}{SUIT_SYMBOLS[si]}{AI_RESET}")
                    obs, rewards, done, info = env.step(suit_action)
                continue
        else:
            # AI turn
            ai_name = f"AI-{player}"

            if env.phase == "choose_suit":
                action = ai_choose_action(env, model, player, device)
                si = action - 56
                print(f"  {AI_COLOR}{ai_name} 指定花色: {SUIT_COLORS[si]}{SUIT_SYMBOLS[si]}{AI_RESET}")
                obs, rewards, done, info = env.step(action)
                continue

            action = ai_choose_action(env, model, player, device)

            if action == DRAW_ACTION:
                obs, rewards, done, info = env.step(action)
                # Check if AI has playable cards after drawing (observable "tell")
                ai_legal = env.legal_actions(player)
                has_playable = any(a < NUM_CARDS or (56 <= a <= 59) for a in ai_legal)
                if has_playable:
                    print(f"  {AI_COLOR}{ai_name} 摸了一张牌 (有牌可出){AI_RESET}")
                else:
                    print(f"  {AI_COLOR}{ai_name} 摸了一张牌 (无牌可出){AI_RESET}")
                # AI still has their turn — now decide to play or pass
                action2 = ai_choose_action(env, model, player, device)
                if action2 == PASS_ACTION:
                    print(f"  {AI_COLOR}{ai_name} 选择不出牌{AI_RESET}")
                    obs, rewards, done, info = env.step(action2)
                else:
                    print(f"  {AI_COLOR}{describe_action(ai_name, action2, env).strip()}{AI_RESET}")
                    obs, rewards, done, info = env.step(action2)
                    if env.phase == "choose_suit" and env._pending_8_player == player:
                        suit_action = ai_choose_action(env, model, player, device)
                        si = suit_action - 56
                        print(f"  {AI_COLOR}{ai_name} 指定花色: {SUIT_COLORS[si]}{SUIT_SYMBOLS[si]}{AI_RESET}")
                        obs, rewards, done, info = env.step(suit_action)
            elif action == PASS_ACTION:
                print(f"  {AI_COLOR}{ai_name} 跳过{AI_RESET}")
                obs, rewards, done, info = env.step(action)
            else:
                print(f"  {AI_COLOR}{describe_action(ai_name, action, env).strip()}{AI_RESET}")
                obs, rewards, done, info = env.step(action)
                if env.phase == "choose_suit" and env._pending_8_player == player:
                    suit_action = ai_choose_action(env, model, player, device)
                    si = suit_action - 56
                    print(f"  {AI_COLOR}{ai_name} 指定花色: {SUIT_COLORS[si]}{SUIT_SYMBOLS[si]}{AI_RESET}")
                    obs, rewards, done, info = env.step(suit_action)

    # Game over
    print_game_state(env, human_player, show_ai_hand=True)
    print()
    if env.winner == human_player:
        print("  🎉 你赢了！！！")
    elif env.winner >= 0:
        print(f"  💀 AI-{env.winner} 赢了...")
    else:
        print("  平局（僵局）")

    # Show hand sizes
    for i in range(env.num_players):
        name = "你" if i == human_player else f"AI-{i}"
        print(f"    {name}: {len(env.hands[i])} 张剩余")
    print()


def main():
    parser = argparse.ArgumentParser(description="Blazing Eights 人机对战")
    parser.add_argument("--model", type=str, default="blazing_ppo_final.pt", help="模型路径")
    parser.add_argument("--num_players", type=int, default=2, help="玩家总数 (2-5)")
    parser.add_argument("--show_ai", action="store_true", help="显示 AI 手牌 (调试用)")
    args = parser.parse_args()

    while True:
        play_game(args.model, args.num_players, human_player=0, show_ai=args.show_ai)
        again = input("  再来一局? (y/n): ").strip().lower()
        if again != "y":
            print("  下次再见！")
            break


if __name__ == "__main__":
    main()