Initial commit: Blazing Eights RL agent

- Game environment with draw-then-decide rule (no auto-play on draw) - PPO self-play training script - Interactive human vs AI game (versus.py) - Real-time play assistant (play.py) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
author: haoyuren <13851610112@163.com> 2026-02-22 01:48:03 -0600
committer: haoyuren <13851610112@163.com> 2026-02-22 01:48:03 -0600
commit: 72cf72d704ca1a3bf4e2a5e04dcbbad99dc0f98e (patch)
tree: 55cb96c17a0a71bc3c7155d65fd19cc185bf495c /versus.py
1 files changed, 341 insertions, 0 deletions
diff --git a/versus.py b/versus.py
new file mode 100644
index 0000000..7225e29
--- /dev/null
+++ b/versus.py
@@ -0,0 +1,341 @@
+"""
+Blazing Eights — Human vs AI interactive game.
+
+Play against the trained PPO agent in your terminal.
+
+Usage:
+  python versus.py --model blazing_ppo_final.pt
+  python versus.py --model blazing_ppo_final.pt --num_players 3  # you + 2 AI
+"""
+
+import argparse
+import torch
+import torch.nn.functional as F
+import numpy as np
+from blazing_env import (
+    BlazingEightsEnv, card_name, card_suit, card_rank,
+    is_swap, RANK_8, RANK_J, RANK_Q, RANK_K,
+    NUM_CARDS, TOTAL_ACTIONS, DRAW_ACTION, PASS_ACTION,
+)
+from train import PolicyValueNet
+
+SUIT_SYMBOLS = ["♠", "♥", "♦", "♣"]
+SUIT_LETTERS = {"s": 0, "h": 1, "d": 2, "c": 3,
+                "♠": 0, "♥": 1, "♦": 2, "♣": 3}
+RANK_NAMES = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "J", "Q", "K"]
+
+
+def card_effect(c: int, num_players: int = 2) -> str:
+    """Return a short effect tag for special cards."""
+    if is_swap(c):
+        return "\033[93m换牌\033[0m"
+    r = card_rank(c)
+    if r == RANK_8:
+        return "\033[93m万能\033[0m"
+    if r == RANK_K:
+        return "\033[93m全摸\033[0m"
+    if r == RANK_Q:
+        return "\033[93m反转\033[0m" if num_players > 2 else ""
+    if r == RANK_J:
+        return "\033[93m跳过\033[0m"
+    return ""
+
+
+def pretty_card(c: int) -> str:
+    if is_swap(c):
+        return "\033[95mSWAP\033[0m"
+    suit = card_suit(c)
+    rank = RANK_NAMES[card_rank(c)]
+    colors = ["\033[37m", "\033[91m", "\033[91m", "\033[37m"]  # ♠white ♥red ♦red ♣white
+    return f"{colors[suit]}{rank}{SUIT_SYMBOLS[suit]}\033[0m"
+
+
+def pretty_hand(hand: list[int], num_players: int = 2) -> str:
+    sorted_hand = sorted(hand, key=lambda c: (card_suit(c) if not is_swap(c) else 99, c))
+    parts = []
+    for i, c in enumerate(sorted_hand):
+        effect = card_effect(c, num_players)
+        tag = f"[{i}] {pretty_card(c)}"
+        if effect:
+            tag += f"({effect})"
+        parts.append(tag)
+    return "  ".join(parts)
+
+
+def print_game_state(env: BlazingEightsEnv, human_player: int, show_ai_hand: bool = False):
+    print()
+    print("=" * 55)
+    top = env.discard[-1]
+    top_str = pretty_card(top)
+    if env.active_suit is not None:
+        top_str += f"  (指定花色: {SUIT_SYMBOLS[env.active_suit]})"
+    dir_str = "顺时针 →" if env.direction == 1 else "逆时针 ←"
+    print(f"  弃牌堆顶: {top_str}    方向: {dir_str}    牌堆剩余: {len(env.deck)}")
+    print("-" * 55)
+    for i in range(env.num_players):
+        if i == human_player:
+            tag = "你"
+            hand_str = f"{len(env.hands[i])} 张牌"
+        else:
+            tag = f"AI-{i}"
+            if show_ai_hand:
+                hand_str = ", ".join(pretty_card(c) for c in sorted(env.hands[i]))
+            else:
+                hand_str = f"{len(env.hands[i])} 张牌"
+        arrow = " ◀" if i == env.current_player else ""
+        print(f"  {tag}: {hand_str}{arrow}")
+    print("=" * 55)
+
+
+def parse_card_input(s: str) -> int:
+    s = s.strip().upper()
+    if s.startswith("SWAP") or s.startswith("SW"):
+        return 52
+    if s.startswith("10"):
+        rank_str, suit_str = "10", s[2:].lower()
+    else:
+        rank_str, suit_str = s[0], s[1:].lower()
+    rank_map = {r: i for i, r in enumerate(RANK_NAMES)}
+    if rank_str not in rank_map or suit_str not in SUIT_LETTERS:
+        raise ValueError(f"无法识别: {s}  (格式例: 8h, Ks, 10d, Ac, SWAP)")
+    return SUIT_LETTERS[suit_str] * 13 + rank_map[rank_str]
+
+
+def human_choose_action(env: BlazingEightsEnv, player: int) -> int:
+    hand = sorted(env.hands[player], key=lambda c: (card_suit(c) if not is_swap(c) else 99, c))
+    legal = env.legal_actions(player)
+
+    if env.phase == "choose_suit":
+        print("\n  你打出了 8！选择指定花色:")
+        for i, s in enumerate(SUIT_SYMBOLS):
+            print(f"    [{i}] {s}")
+        while True:
+            try:
+                choice = input("  选择 (0-3): ").strip()
+                idx = int(choice)
+                action = 56 + idx
+                if action in legal:
+                    return action
+                print("  无效选择，请重试")
+            except (ValueError, IndexError):
+                print("  请输入 0-3")
+        return action
+
+    print(f"\n  你的手牌: {pretty_hand(hand, env.num_players)}")
+
+    # Build playable cards display
+    playable = [a for a in legal if a < NUM_CARDS]
+    can_draw = DRAW_ACTION in legal
+    can_pass = PASS_ACTION in legal
+
+    print("  可出的牌:", end="")
+    if playable:
+        playable_names = []
+        for a in playable:
+            idx_in_hand = hand.index(a)
+            effect = card_effect(a, env.num_players)
+            tag = f"[{idx_in_hand}]{pretty_card(a)}"
+            if effect:
+                tag += f"({effect})"
+            playable_names.append(tag)
+        print("  " + "  ".join(playable_names))
+    else:
+        print("  无")
+
+    if can_draw:
+        print("  [d] 摸牌")
+    if can_pass:
+        print("  [p] 跳过 (牌堆与弃牌堆均已空)")
+
+    while True:
+        choice = input("  你的选择: ").strip().lower()
+        if choice == "d" and can_draw:
+            return DRAW_ACTION
+        if choice == "p" and can_pass:
+            return PASS_ACTION
+        if choice == "d" and not can_draw:
+            print("  牌堆已空，无法摸牌")
+            continue
+        if choice == "p" and not can_pass:
+            print("  还没摸牌，不能直接跳过")
+            continue
+        if choice == "q":
+            raise KeyboardInterrupt
+        try:
+            idx = int(choice)
+            if 0 <= idx < len(hand):
+                card = hand[idx]
+                if card in playable:
+                    return card
+                # Handle swap cards (might have multiple)
+                if is_swap(card):
+                    for a in playable:
+                        if is_swap(a):
+                            return a
+                print(f"  {pretty_card(card)} 不能出，请选其他牌")
+            else:
+                print(f"  序号超出范围 (0-{len(hand)-1})")
+        except ValueError:
+            print("  输入序号、d(摸牌) 或 q(退出)")
+
+
+def ai_choose_action(env: BlazingEightsEnv, model: PolicyValueNet, player: int, device="cpu") -> int:
+    obs = env._get_obs(player)
+    legal = env.legal_actions(player)
+    action, _, value = model.get_action(obs, legal, device)
+    return action
+
+
+def describe_action(player_name: str, action: int, env: BlazingEightsEnv, drawn_card: int = None):
+    if action == DRAW_ACTION:
+        return f"  {player_name} 摸了一张牌"
+    if action == PASS_ACTION:
+        return f"  {player_name} 跳过"
+    if action >= 56:
+        suit = action - 56
+        return f"  {player_name} 指定花色: {SUIT_SYMBOLS[suit]}"
+    desc = f"  {player_name} 打出 {pretty_card(action)}"
+    rank = card_rank(action)
+    if is_swap(action):
+        desc += "  → 与下家交换手牌！"
+    elif rank == RANK_8:
+        desc += "  → 万能牌！选择花色..."
+    elif rank == RANK_K:
+        desc += "  → 其他所有人各摸 1 张！"
+    elif rank == RANK_Q and env.num_players > 2:
+        desc += "  → 反转方向！"
+    elif rank == RANK_J:
+        desc += "  → 跳过下一位！"
+    return desc
+
+
+def play_game(model_path: str, num_players: int, human_player: int = 0, show_ai: bool = False):
+    device = "cpu"
+    model = PolicyValueNet()
+    checkpoint = torch.load(model_path, map_location=device, weights_only=True)
+    model.load_state_dict(checkpoint["model"])
+    model.eval()
+
+    print()
+    print("╔══════════════════════════════════════╗")
+    print("║     Blazing Eights - 人机对战        ║")
+    print("╠══════════════════════════════════════╣")
+    print(f"║  玩家数: {num_players}   你是: Player {human_player}         ║")
+    print("║  输入序号出牌, d摸牌, p跳过, q退出   ║")
+    print("╚══════════════════════════════════════╝")
+
+    env = BlazingEightsEnv(num_players=num_players)
+    turn = 0
+
+    while not env.done:
+        player = env.current_player
+        turn += 1
+
+        if player == human_player:
+            print_game_state(env, human_player, show_ai_hand=show_ai)
+            try:
+                action = human_choose_action(env, player)
+            except KeyboardInterrupt:
+                print("\n\n  你退出了游戏。再见！")
+                return
+
+            # Describe human action
+            name = "你"
+            if action == DRAW_ACTION:
+                # Remember hand before draw to find the new card
+                hand_before = set(env.hands[player])
+                obs, rewards, done, info = env.step(action)
+                hand_after = set(env.hands[player])
+                new_cards = hand_after - hand_before
+                if new_cards:
+                    drawn = next(iter(new_cards))
+                    print(f"  你摸到了 {pretty_card(drawn)}")
+                else:
+                    print(f"  牌堆已空，没摸到牌")
+                # Turn stays with human — loop back to let them decide
+                continue
+            elif action == PASS_ACTION:
+                print(f"  你选择不出牌，结束回合")
+                obs, rewards, done, info = env.step(action)
+                continue
+            else:
+                print(describe_action(name, action, env))
+                obs, rewards, done, info = env.step(action)
+                # If played an 8, need to choose suit
+                if env.phase == "choose_suit" and env._pending_8_player == human_player:
+                    suit_action = human_choose_action(env, human_player)
+                    print(f"  你指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+                    obs, rewards, done, info = env.step(suit_action)
+                continue
+        else:
+            # AI turn
+            ai_name = f"AI-{player}"
+
+            if env.phase == "choose_suit":
+                action = ai_choose_action(env, model, player, device)
+                print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[action - 56]}")
+                obs, rewards, done, info = env.step(action)
+                continue
+
+            action = ai_choose_action(env, model, player, device)
+
+            if action == DRAW_ACTION:
+                print(f"  {ai_name} 摸了一张牌")
+                obs, rewards, done, info = env.step(action)
+                # AI still has their turn — now decide to play or pass
+                action2 = ai_choose_action(env, model, player, device)
+                if action2 == PASS_ACTION:
+                    print(f"  {ai_name} 选择不出牌")
+                    obs, rewards, done, info = env.step(action2)
+                else:
+                    print(describe_action(ai_name, action2, env))
+                    obs, rewards, done, info = env.step(action2)
+                    if env.phase == "choose_suit" and env._pending_8_player == player:
+                        suit_action = ai_choose_action(env, model, player, device)
+                        print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+                        obs, rewards, done, info = env.step(suit_action)
+            elif action == PASS_ACTION:
+                print(f"  {ai_name} 跳过")
+                obs, rewards, done, info = env.step(action)
+            else:
+                print(describe_action(ai_name, action, env))
+                obs, rewards, done, info = env.step(action)
+                if env.phase == "choose_suit" and env._pending_8_player == player:
+                    suit_action = ai_choose_action(env, model, player, device)
+                    print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+                    obs, rewards, done, info = env.step(suit_action)
+
+    # Game over
+    print_game_state(env, human_player, show_ai_hand=True)
+    print()
+    if env.winner == human_player:
+        print("  🎉 你赢了！！！")
+    elif env.winner >= 0:
+        print(f"  💀 AI-{env.winner} 赢了...")
+    else:
+        print("  平局（僵局）")
+
+    # Show hand sizes
+    for i in range(env.num_players):
+        name = "你" if i == human_player else f"AI-{i}"
+        print(f"    {name}: {len(env.hands[i])} 张剩余")
+    print()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Blazing Eights 人机对战")
+    parser.add_argument("--model", type=str, default="blazing_ppo_final.pt", help="模型路径")
+    parser.add_argument("--num_players", type=int, default=2, help="玩家总数 (2-5)")
+    parser.add_argument("--show_ai", action="store_true", help="显示 AI 手牌 (调试用)")
+    args = parser.parse_args()
+
+    while True:
+        play_game(args.model, args.num_players, human_player=0, show_ai=args.show_ai)
+        again = input("  再来一局? (y/n): ").strip().lower()
+        if again != "y":
+            print("  下次再见！")
+            break
+
+
+if __name__ == "__main__":
+    main()
author	haoyuren <13851610112@163.com>	2026-02-22 01:48:03 -0600
committer	haoyuren <13851610112@163.com>	2026-02-22 01:48:03 -0600
commit	72cf72d704ca1a3bf4e2a5e04dcbbad99dc0f98e (patch)
tree	55cb96c17a0a71bc3c7155d65fd19cc185bf495c /versus.py