summaryrefslogtreecommitdiff
path: root/versus.py
diff options
context:
space:
mode:
authorhaoyuren <13851610112@163.com>2026-02-22 01:48:03 -0600
committerhaoyuren <13851610112@163.com>2026-02-22 01:48:03 -0600
commit72cf72d704ca1a3bf4e2a5e04dcbbad99dc0f98e (patch)
tree55cb96c17a0a71bc3c7155d65fd19cc185bf495c /versus.py
Initial commit: Blazing Eights RL agent
- Game environment with draw-then-decide rule (no auto-play on draw) - PPO self-play training script - Interactive human vs AI game (versus.py) - Real-time play assistant (play.py) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'versus.py')
-rw-r--r--versus.py341
1 files changed, 341 insertions, 0 deletions
diff --git a/versus.py b/versus.py
new file mode 100644
index 0000000..7225e29
--- /dev/null
+++ b/versus.py
@@ -0,0 +1,341 @@
+"""
+Blazing Eights — Human vs AI interactive game.
+
+Play against the trained PPO agent in your terminal.
+
+Usage:
+ python versus.py --model blazing_ppo_final.pt
+ python versus.py --model blazing_ppo_final.pt --num_players 3 # you + 2 AI
+"""
+
+import argparse
+import torch
+import torch.nn.functional as F
+import numpy as np
+from blazing_env import (
+ BlazingEightsEnv, card_name, card_suit, card_rank,
+ is_swap, RANK_8, RANK_J, RANK_Q, RANK_K,
+ NUM_CARDS, TOTAL_ACTIONS, DRAW_ACTION, PASS_ACTION,
+)
+from train import PolicyValueNet
+
+SUIT_SYMBOLS = ["♠", "♥", "♦", "♣"]
+SUIT_LETTERS = {"s": 0, "h": 1, "d": 2, "c": 3,
+ "♠": 0, "♥": 1, "♦": 2, "♣": 3}
+RANK_NAMES = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "J", "Q", "K"]
+
+
+def card_effect(c: int, num_players: int = 2) -> str:
+ """Return a short effect tag for special cards."""
+ if is_swap(c):
+ return "\033[93m换牌\033[0m"
+ r = card_rank(c)
+ if r == RANK_8:
+ return "\033[93m万能\033[0m"
+ if r == RANK_K:
+ return "\033[93m全摸\033[0m"
+ if r == RANK_Q:
+ return "\033[93m反转\033[0m" if num_players > 2 else ""
+ if r == RANK_J:
+ return "\033[93m跳过\033[0m"
+ return ""
+
+
+def pretty_card(c: int) -> str:
+ if is_swap(c):
+ return "\033[95mSWAP\033[0m"
+ suit = card_suit(c)
+ rank = RANK_NAMES[card_rank(c)]
+ colors = ["\033[37m", "\033[91m", "\033[91m", "\033[37m"] # ♠white ♥red ♦red ♣white
+ return f"{colors[suit]}{rank}{SUIT_SYMBOLS[suit]}\033[0m"
+
+
+def pretty_hand(hand: list[int], num_players: int = 2) -> str:
+ sorted_hand = sorted(hand, key=lambda c: (card_suit(c) if not is_swap(c) else 99, c))
+ parts = []
+ for i, c in enumerate(sorted_hand):
+ effect = card_effect(c, num_players)
+ tag = f"[{i}] {pretty_card(c)}"
+ if effect:
+ tag += f"({effect})"
+ parts.append(tag)
+ return " ".join(parts)
+
+
+def print_game_state(env: BlazingEightsEnv, human_player: int, show_ai_hand: bool = False):
+ print()
+ print("=" * 55)
+ top = env.discard[-1]
+ top_str = pretty_card(top)
+ if env.active_suit is not None:
+ top_str += f" (指定花色: {SUIT_SYMBOLS[env.active_suit]})"
+ dir_str = "顺时针 →" if env.direction == 1 else "逆时针 ←"
+ print(f" 弃牌堆顶: {top_str} 方向: {dir_str} 牌堆剩余: {len(env.deck)}")
+ print("-" * 55)
+ for i in range(env.num_players):
+ if i == human_player:
+ tag = "你"
+ hand_str = f"{len(env.hands[i])} 张牌"
+ else:
+ tag = f"AI-{i}"
+ if show_ai_hand:
+ hand_str = ", ".join(pretty_card(c) for c in sorted(env.hands[i]))
+ else:
+ hand_str = f"{len(env.hands[i])} 张牌"
+ arrow = " ◀" if i == env.current_player else ""
+ print(f" {tag}: {hand_str}{arrow}")
+ print("=" * 55)
+
+
+def parse_card_input(s: str) -> int:
+ s = s.strip().upper()
+ if s.startswith("SWAP") or s.startswith("SW"):
+ return 52
+ if s.startswith("10"):
+ rank_str, suit_str = "10", s[2:].lower()
+ else:
+ rank_str, suit_str = s[0], s[1:].lower()
+ rank_map = {r: i for i, r in enumerate(RANK_NAMES)}
+ if rank_str not in rank_map or suit_str not in SUIT_LETTERS:
+ raise ValueError(f"无法识别: {s} (格式例: 8h, Ks, 10d, Ac, SWAP)")
+ return SUIT_LETTERS[suit_str] * 13 + rank_map[rank_str]
+
+
+def human_choose_action(env: BlazingEightsEnv, player: int) -> int:
+ hand = sorted(env.hands[player], key=lambda c: (card_suit(c) if not is_swap(c) else 99, c))
+ legal = env.legal_actions(player)
+
+ if env.phase == "choose_suit":
+ print("\n 你打出了 8!选择指定花色:")
+ for i, s in enumerate(SUIT_SYMBOLS):
+ print(f" [{i}] {s}")
+ while True:
+ try:
+ choice = input(" 选择 (0-3): ").strip()
+ idx = int(choice)
+ action = 56 + idx
+ if action in legal:
+ return action
+ print(" 无效选择,请重试")
+ except (ValueError, IndexError):
+ print(" 请输入 0-3")
+ return action
+
+ print(f"\n 你的手牌: {pretty_hand(hand, env.num_players)}")
+
+ # Build playable cards display
+ playable = [a for a in legal if a < NUM_CARDS]
+ can_draw = DRAW_ACTION in legal
+ can_pass = PASS_ACTION in legal
+
+ print(" 可出的牌:", end="")
+ if playable:
+ playable_names = []
+ for a in playable:
+ idx_in_hand = hand.index(a)
+ effect = card_effect(a, env.num_players)
+ tag = f"[{idx_in_hand}]{pretty_card(a)}"
+ if effect:
+ tag += f"({effect})"
+ playable_names.append(tag)
+ print(" " + " ".join(playable_names))
+ else:
+ print(" 无")
+
+ if can_draw:
+ print(" [d] 摸牌")
+ if can_pass:
+ print(" [p] 跳过 (牌堆与弃牌堆均已空)")
+
+ while True:
+ choice = input(" 你的选择: ").strip().lower()
+ if choice == "d" and can_draw:
+ return DRAW_ACTION
+ if choice == "p" and can_pass:
+ return PASS_ACTION
+ if choice == "d" and not can_draw:
+ print(" 牌堆已空,无法摸牌")
+ continue
+ if choice == "p" and not can_pass:
+ print(" 还没摸牌,不能直接跳过")
+ continue
+ if choice == "q":
+ raise KeyboardInterrupt
+ try:
+ idx = int(choice)
+ if 0 <= idx < len(hand):
+ card = hand[idx]
+ if card in playable:
+ return card
+ # Handle swap cards (might have multiple)
+ if is_swap(card):
+ for a in playable:
+ if is_swap(a):
+ return a
+ print(f" {pretty_card(card)} 不能出,请选其他牌")
+ else:
+ print(f" 序号超出范围 (0-{len(hand)-1})")
+ except ValueError:
+ print(" 输入序号、d(摸牌) 或 q(退出)")
+
+
+def ai_choose_action(env: BlazingEightsEnv, model: PolicyValueNet, player: int, device="cpu") -> int:
+ obs = env._get_obs(player)
+ legal = env.legal_actions(player)
+ action, _, value = model.get_action(obs, legal, device)
+ return action
+
+
+def describe_action(player_name: str, action: int, env: BlazingEightsEnv, drawn_card: int = None):
+ if action == DRAW_ACTION:
+ return f" {player_name} 摸了一张牌"
+ if action == PASS_ACTION:
+ return f" {player_name} 跳过"
+ if action >= 56:
+ suit = action - 56
+ return f" {player_name} 指定花色: {SUIT_SYMBOLS[suit]}"
+ desc = f" {player_name} 打出 {pretty_card(action)}"
+ rank = card_rank(action)
+ if is_swap(action):
+ desc += " → 与下家交换手牌!"
+ elif rank == RANK_8:
+ desc += " → 万能牌!选择花色..."
+ elif rank == RANK_K:
+ desc += " → 其他所有人各摸 1 张!"
+ elif rank == RANK_Q and env.num_players > 2:
+ desc += " → 反转方向!"
+ elif rank == RANK_J:
+ desc += " → 跳过下一位!"
+ return desc
+
+
+def play_game(model_path: str, num_players: int, human_player: int = 0, show_ai: bool = False):
+ device = "cpu"
+ model = PolicyValueNet()
+ checkpoint = torch.load(model_path, map_location=device, weights_only=True)
+ model.load_state_dict(checkpoint["model"])
+ model.eval()
+
+ print()
+ print("╔══════════════════════════════════════╗")
+ print("║ Blazing Eights - 人机对战 ║")
+ print("╠══════════════════════════════════════╣")
+ print(f"║ 玩家数: {num_players} 你是: Player {human_player} ║")
+ print("║ 输入序号出牌, d摸牌, p跳过, q退出 ║")
+ print("╚══════════════════════════════════════╝")
+
+ env = BlazingEightsEnv(num_players=num_players)
+ turn = 0
+
+ while not env.done:
+ player = env.current_player
+ turn += 1
+
+ if player == human_player:
+ print_game_state(env, human_player, show_ai_hand=show_ai)
+ try:
+ action = human_choose_action(env, player)
+ except KeyboardInterrupt:
+ print("\n\n 你退出了游戏。再见!")
+ return
+
+ # Describe human action
+ name = "你"
+ if action == DRAW_ACTION:
+ # Remember hand before draw to find the new card
+ hand_before = set(env.hands[player])
+ obs, rewards, done, info = env.step(action)
+ hand_after = set(env.hands[player])
+ new_cards = hand_after - hand_before
+ if new_cards:
+ drawn = next(iter(new_cards))
+ print(f" 你摸到了 {pretty_card(drawn)}")
+ else:
+ print(f" 牌堆已空,没摸到牌")
+ # Turn stays with human — loop back to let them decide
+ continue
+ elif action == PASS_ACTION:
+ print(f" 你选择不出牌,结束回合")
+ obs, rewards, done, info = env.step(action)
+ continue
+ else:
+ print(describe_action(name, action, env))
+ obs, rewards, done, info = env.step(action)
+ # If played an 8, need to choose suit
+ if env.phase == "choose_suit" and env._pending_8_player == human_player:
+ suit_action = human_choose_action(env, human_player)
+ print(f" 你指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+ obs, rewards, done, info = env.step(suit_action)
+ continue
+ else:
+ # AI turn
+ ai_name = f"AI-{player}"
+
+ if env.phase == "choose_suit":
+ action = ai_choose_action(env, model, player, device)
+ print(f" {ai_name} 指定花色: {SUIT_SYMBOLS[action - 56]}")
+ obs, rewards, done, info = env.step(action)
+ continue
+
+ action = ai_choose_action(env, model, player, device)
+
+ if action == DRAW_ACTION:
+ print(f" {ai_name} 摸了一张牌")
+ obs, rewards, done, info = env.step(action)
+ # AI still has their turn — now decide to play or pass
+ action2 = ai_choose_action(env, model, player, device)
+ if action2 == PASS_ACTION:
+ print(f" {ai_name} 选择不出牌")
+ obs, rewards, done, info = env.step(action2)
+ else:
+ print(describe_action(ai_name, action2, env))
+ obs, rewards, done, info = env.step(action2)
+ if env.phase == "choose_suit" and env._pending_8_player == player:
+ suit_action = ai_choose_action(env, model, player, device)
+ print(f" {ai_name} 指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+ obs, rewards, done, info = env.step(suit_action)
+ elif action == PASS_ACTION:
+ print(f" {ai_name} 跳过")
+ obs, rewards, done, info = env.step(action)
+ else:
+ print(describe_action(ai_name, action, env))
+ obs, rewards, done, info = env.step(action)
+ if env.phase == "choose_suit" and env._pending_8_player == player:
+ suit_action = ai_choose_action(env, model, player, device)
+ print(f" {ai_name} 指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+ obs, rewards, done, info = env.step(suit_action)
+
+ # Game over
+ print_game_state(env, human_player, show_ai_hand=True)
+ print()
+ if env.winner == human_player:
+ print(" 🎉 你赢了!!!")
+ elif env.winner >= 0:
+ print(f" 💀 AI-{env.winner} 赢了...")
+ else:
+ print(" 平局(僵局)")
+
+ # Show hand sizes
+ for i in range(env.num_players):
+ name = "你" if i == human_player else f"AI-{i}"
+ print(f" {name}: {len(env.hands[i])} 张剩余")
+ print()
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Blazing Eights 人机对战")
+ parser.add_argument("--model", type=str, default="blazing_ppo_final.pt", help="模型路径")
+ parser.add_argument("--num_players", type=int, default=2, help="玩家总数 (2-5)")
+ parser.add_argument("--show_ai", action="store_true", help="显示 AI 手牌 (调试用)")
+ args = parser.parse_args()
+
+ while True:
+ play_game(args.model, args.num_players, human_player=0, show_ai=args.show_ai)
+ again = input(" 再来一局? (y/n): ").strip().lower()
+ if again != "y":
+ print(" 下次再见!")
+ break
+
+
+if __name__ == "__main__":
+ main()