versus.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341

"""
Blazing Eights — Human vs AI interactive game.

Play against the trained PPO agent in your terminal.

Usage:
  python versus.py --model blazing_ppo_final.pt
  python versus.py --model blazing_ppo_final.pt --num_players 3  # you + 2 AI
"""

import argparse
import torch
import torch.nn.functional as F
import numpy as np
from blazing_env import (
    BlazingEightsEnv, card_name, card_suit, card_rank,
    is_swap, RANK_8, RANK_J, RANK_Q, RANK_K,
    NUM_CARDS, TOTAL_ACTIONS, DRAW_ACTION, PASS_ACTION,
)
from train import PolicyValueNet

SUIT_SYMBOLS = ["♠", "♥", "♦", "♣"]
SUIT_LETTERS = {"s": 0, "h": 1, "d": 2, "c": 3,
                "♠": 0, "♥": 1, "♦": 2, "♣": 3}
RANK_NAMES = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "J", "Q", "K"]


def card_effect(c: int, num_players: int = 2) -> str:
    """Return a short effect tag for special cards."""
    if is_swap(c):
        return "\033[93m换牌\033[0m"
    r = card_rank(c)
    if r == RANK_8:
        return "\033[93m万能\033[0m"
    if r == RANK_K:
        return "\033[93m全摸\033[0m"
    if r == RANK_Q:
        return "\033[93m反转\033[0m" if num_players > 2 else ""
    if r == RANK_J:
        return "\033[93m跳过\033[0m"
    return ""


def pretty_card(c: int) -> str:
    if is_swap(c):
        return "\033[95mSWAP\033[0m"
    suit = card_suit(c)
    rank = RANK_NAMES[card_rank(c)]
    colors = ["\033[37m", "\033[91m", "\033[91m", "\033[37m"]  # ♠white ♥red ♦red ♣white
    return f"{colors[suit]}{rank}{SUIT_SYMBOLS[suit]}\033[0m"


def pretty_hand(hand: list[int], num_players: int = 2) -> str:
    sorted_hand = sorted(hand, key=lambda c: (card_suit(c) if not is_swap(c) else 99, c))
    parts = []
    for i, c in enumerate(sorted_hand):
        effect = card_effect(c, num_players)
        tag = f"[{i}] {pretty_card(c)}"
        if effect:
            tag += f"({effect})"
        parts.append(tag)
    return "  ".join(parts)


def print_game_state(env: BlazingEightsEnv, human_player: int, show_ai_hand: bool = False):
    print()
    print("=" * 55)
    top = env.discard[-1]
    top_str = pretty_card(top)
    if env.active_suit is not None:
        top_str += f"  (指定花色: {SUIT_SYMBOLS[env.active_suit]})"
    dir_str = "顺时针 →" if env.direction == 1 else "逆时针 ←"
    print(f"  弃牌堆顶: {top_str}    方向: {dir_str}    牌堆剩余: {len(env.deck)}")
    print("-" * 55)
    for i in range(env.num_players):
        if i == human_player:
            tag = "你"
            hand_str = f"{len(env.hands[i])} 张牌"
        else:
            tag = f"AI-{i}"
            if show_ai_hand:
                hand_str = ", ".join(pretty_card(c) for c in sorted(env.hands[i]))
            else:
                hand_str = f"{len(env.hands[i])} 张牌"
        arrow = " ◀" if i == env.current_player else ""
        print(f"  {tag}: {hand_str}{arrow}")
    print("=" * 55)


def parse_card_input(s: str) -> int:
    s = s.strip().upper()
    if s.startswith("SWAP") or s.startswith("SW"):
        return 52
    if s.startswith("10"):
        rank_str, suit_str = "10", s[2:].lower()
    else:
        rank_str, suit_str = s[0], s[1:].lower()
    rank_map = {r: i for i, r in enumerate(RANK_NAMES)}
    if rank_str not in rank_map or suit_str not in SUIT_LETTERS:
        raise ValueError(f"无法识别: {s}  (格式例: 8h, Ks, 10d, Ac, SWAP)")
    return SUIT_LETTERS[suit_str] * 13 + rank_map[rank_str]


def human_choose_action(env: BlazingEightsEnv, player: int) -> int:
    hand = sorted(env.hands[player], key=lambda c: (card_suit(c) if not is_swap(c) else 99, c))
    legal = env.legal_actions(player)

    if env.phase == "choose_suit":
        print("\n  你打出了 8！选择指定花色:")
        for i, s in enumerate(SUIT_SYMBOLS):
            print(f"    [{i}] {s}")
        while True:
            try:
                choice = input("  选择 (0-3): ").strip()
                idx = int(choice)
                action = 56 + idx
                if action in legal:
                    return action
                print("  无效选择，请重试")
            except (ValueError, IndexError):
                print("  请输入 0-3")
        return action

    print(f"\n  你的手牌: {pretty_hand(hand, env.num_players)}")

    # Build playable cards display
    playable = [a for a in legal if a < NUM_CARDS]
    can_draw = DRAW_ACTION in legal
    can_pass = PASS_ACTION in legal

    print("  可出的牌:", end="")
    if playable:
        playable_names = []
        for a in playable:
            idx_in_hand = hand.index(a)
            effect = card_effect(a, env.num_players)
            tag = f"[{idx_in_hand}]{pretty_card(a)}"
            if effect:
                tag += f"({effect})"
            playable_names.append(tag)
        print("  " + "  ".join(playable_names))
    else:
        print("  无")

    if can_draw:
        print("  [d] 摸牌")
    if can_pass:
        print("  [p] 跳过 (牌堆与弃牌堆均已空)")

    while True:
        choice = input("  你的选择: ").strip().lower()
        if choice == "d" and can_draw:
            return DRAW_ACTION
        if choice == "p" and can_pass:
            return PASS_ACTION
        if choice == "d" and not can_draw:
            print("  牌堆已空，无法摸牌")
            continue
        if choice == "p" and not can_pass:
            print("  还没摸牌，不能直接跳过")
            continue
        if choice == "q":
            raise KeyboardInterrupt
        try:
            idx = int(choice)
            if 0 <= idx < len(hand):
                card = hand[idx]
                if card in playable:
                    return card
                # Handle swap cards (might have multiple)
                if is_swap(card):
                    for a in playable:
                        if is_swap(a):
                            return a
                print(f"  {pretty_card(card)} 不能出，请选其他牌")
            else:
                print(f"  序号超出范围 (0-{len(hand)-1})")
        except ValueError:
            print("  输入序号、d(摸牌) 或 q(退出)")


def ai_choose_action(env: BlazingEightsEnv, model: PolicyValueNet, player: int, device="cpu") -> int:
    obs = env._get_obs(player)
    legal = env.legal_actions(player)
    action, _, value = model.get_action(obs, legal, device)
    return action


def describe_action(player_name: str, action: int, env: BlazingEightsEnv, drawn_card: int = None):
    if action == DRAW_ACTION:
        return f"  {player_name} 摸了一张牌"
    if action == PASS_ACTION:
        return f"  {player_name} 跳过"
    if action >= 56:
        suit = action - 56
        return f"  {player_name} 指定花色: {SUIT_SYMBOLS[suit]}"
    desc = f"  {player_name} 打出 {pretty_card(action)}"
    rank = card_rank(action)
    if is_swap(action):
        desc += "  → 与下家交换手牌！"
    elif rank == RANK_8:
        desc += "  → 万能牌！选择花色..."
    elif rank == RANK_K:
        desc += "  → 其他所有人各摸 1 张！"
    elif rank == RANK_Q and env.num_players > 2:
        desc += "  → 反转方向！"
    elif rank == RANK_J:
        desc += "  → 跳过下一位！"
    return desc


def play_game(model_path: str, num_players: int, human_player: int = 0, show_ai: bool = False):
    device = "cpu"
    model = PolicyValueNet()
    checkpoint = torch.load(model_path, map_location=device, weights_only=True)
    model.load_state_dict(checkpoint["model"])
    model.eval()

    print()
    print("╔══════════════════════════════════════╗")
    print("║     Blazing Eights - 人机对战        ║")
    print("╠══════════════════════════════════════╣")
    print(f"║  玩家数: {num_players}   你是: Player {human_player}         ║")
    print("║  输入序号出牌, d摸牌, p跳过, q退出   ║")
    print("╚══════════════════════════════════════╝")

    env = BlazingEightsEnv(num_players=num_players)
    turn = 0

    while not env.done:
        player = env.current_player
        turn += 1

        if player == human_player:
            print_game_state(env, human_player, show_ai_hand=show_ai)
            try:
                action = human_choose_action(env, player)
            except KeyboardInterrupt:
                print("\n\n  你退出了游戏。再见！")
                return

            # Describe human action
            name = "你"
            if action == DRAW_ACTION:
                # Remember hand before draw to find the new card
                hand_before = set(env.hands[player])
                obs, rewards, done, info = env.step(action)
                hand_after = set(env.hands[player])
                new_cards = hand_after - hand_before
                if new_cards:
                    drawn = next(iter(new_cards))
                    print(f"  你摸到了 {pretty_card(drawn)}")
                else:
                    print(f"  牌堆已空，没摸到牌")
                # Turn stays with human — loop back to let them decide
                continue
            elif action == PASS_ACTION:
                print(f"  你选择不出牌，结束回合")
                obs, rewards, done, info = env.step(action)
                continue
            else:
                print(describe_action(name, action, env))
                obs, rewards, done, info = env.step(action)
                # If played an 8, need to choose suit
                if env.phase == "choose_suit" and env._pending_8_player == human_player:
                    suit_action = human_choose_action(env, human_player)
                    print(f"  你指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
                    obs, rewards, done, info = env.step(suit_action)
                continue
        else:
            # AI turn
            ai_name = f"AI-{player}"

            if env.phase == "choose_suit":
                action = ai_choose_action(env, model, player, device)
                print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[action - 56]}")
                obs, rewards, done, info = env.step(action)
                continue

            action = ai_choose_action(env, model, player, device)

            if action == DRAW_ACTION:
                print(f"  {ai_name} 摸了一张牌")
                obs, rewards, done, info = env.step(action)
                # AI still has their turn — now decide to play or pass
                action2 = ai_choose_action(env, model, player, device)
                if action2 == PASS_ACTION:
                    print(f"  {ai_name} 选择不出牌")
                    obs, rewards, done, info = env.step(action2)
                else:
                    print(describe_action(ai_name, action2, env))
                    obs, rewards, done, info = env.step(action2)
                    if env.phase == "choose_suit" and env._pending_8_player == player:
                        suit_action = ai_choose_action(env, model, player, device)
                        print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
                        obs, rewards, done, info = env.step(suit_action)
            elif action == PASS_ACTION:
                print(f"  {ai_name} 跳过")
                obs, rewards, done, info = env.step(action)
            else:
                print(describe_action(ai_name, action, env))
                obs, rewards, done, info = env.step(action)
                if env.phase == "choose_suit" and env._pending_8_player == player:
                    suit_action = ai_choose_action(env, model, player, device)
                    print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
                    obs, rewards, done, info = env.step(suit_action)

    # Game over
    print_game_state(env, human_player, show_ai_hand=True)
    print()
    if env.winner == human_player:
        print("  🎉 你赢了！！！")
    elif env.winner >= 0:
        print(f"  💀 AI-{env.winner} 赢了...")
    else:
        print("  平局（僵局）")

    # Show hand sizes
    for i in range(env.num_players):
        name = "你" if i == human_player else f"AI-{i}"
        print(f"    {name}: {len(env.hands[i])} 张剩余")
    print()


def main():
    parser = argparse.ArgumentParser(description="Blazing Eights 人机对战")
    parser.add_argument("--model", type=str, default="blazing_ppo_final.pt", help="模型路径")
    parser.add_argument("--num_players", type=int, default=2, help="玩家总数 (2-5)")
    parser.add_argument("--show_ai", action="store_true", help="显示 AI 手牌 (调试用)")
    args = parser.parse_args()

    while True:
        play_game(args.model, args.num_players, human_player=0, show_ai=args.show_ai)
        again = input("  再来一局? (y/n): ").strip().lower()
        if again != "y":
            print("  下次再见！")
            break


if __name__ == "__main__":
    main()