Improve versus UI: suit colors, AI highlighting, draw tell

- Color-code suits: ♠blue ♥magenta ♦yellow ♣cyan - AI actions highlighted in red - Show whether AI has playable cards after drawing (observable tell) - Fix pass prompt: show context-specific reason (无法出牌/不出牌/牌堆已空) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
author: haoyuren <13851610112@163.com> 2026-02-22 03:16:08 -0600
committer: haoyuren <13851610112@163.com> 2026-02-22 03:16:08 -0600
commit: 1cb5eb34ead9b4efc1032ec74c6ccc439f007c18 (patch)
tree: 8b6fa5847d4057f9298f3799a9d26c5922050a2e
parent: dc4795bf7a9991fca4673bf928830b7b627034e4 (diff)
1 files changed, 35 insertions, 15 deletions
diff --git a/versus.py b/versus.py
index 7225e29..9b9b8dd 100644
--- a/versus.py
+++ b/versus.py
@@ -19,6 +19,11 @@ from blazing_env import (
 )
 from train import PolicyValueNet
 
+AI_COLOR = "\033[91m"   # red for AI actions
+AI_RESET = "\033[0m"
+# Suit colors: ♠blue ♥magenta ♦yellow ♣cyan
+SUIT_COLORS = ["\033[94m", "\033[35m", "\033[93m", "\033[96m"]
+
 SUIT_SYMBOLS = ["♠", "♥", "♦", "♣"]
 SUIT_LETTERS = {"s": 0, "h": 1, "d": 2, "c": 3,
                 "♠": 0, "♥": 1, "♦": 2, "♣": 3}
@@ -46,8 +51,7 @@ def pretty_card(c: int) -> str:
         return "\033[95mSWAP\033[0m"
     suit = card_suit(c)
     rank = RANK_NAMES[card_rank(c)]
-    colors = ["\033[37m", "\033[91m", "\033[91m", "\033[37m"]  # ♠white ♥red ♦red ♣white
-    return f"{colors[suit]}{rank}{SUIT_SYMBOLS[suit]}\033[0m"
+    return f"{SUIT_COLORS[suit]}{rank}{SUIT_SYMBOLS[suit]}\033[0m"
 
 
 def pretty_hand(hand: list[int], num_players: int = 2) -> str:
@@ -68,7 +72,8 @@ def print_game_state(env: BlazingEightsEnv, human_player: int, show_ai_hand: boo
     top = env.discard[-1]
     top_str = pretty_card(top)
     if env.active_suit is not None:
-        top_str += f"  (指定花色: {SUIT_SYMBOLS[env.active_suit]})"
+        s = env.active_suit
+        top_str += f"  (指定花色: {SUIT_COLORS[s]}{SUIT_SYMBOLS[s]}{AI_RESET})"
     dir_str = "顺时针 →" if env.direction == 1 else "逆时针 ←"
     print(f"  弃牌堆顶: {top_str}    方向: {dir_str}    牌堆剩余: {len(env.deck)}")
     print("-" * 55)
@@ -108,7 +113,7 @@ def human_choose_action(env: BlazingEightsEnv, player: int) -> int:
     if env.phase == "choose_suit":
         print("\n  你打出了 8！选择指定花色:")
         for i, s in enumerate(SUIT_SYMBOLS):
-            print(f"    [{i}] {s}")
+            print(f"    [{i}] {SUIT_COLORS[i]}{s}{AI_RESET}")
         while True:
             try:
                 choice = input("  选择 (0-3): ").strip()
@@ -145,7 +150,12 @@ def human_choose_action(env: BlazingEightsEnv, player: int) -> int:
     if can_draw:
         print("  [d] 摸牌")
     if can_pass:
-        print("  [p] 跳过 (牌堆与弃牌堆均已空)")
+        if env.has_drawn_this_turn and not playable:
+            print("  [p] 跳过 (无法出牌)")
+        elif env.has_drawn_this_turn:
+            print("  [p] 不出牌")
+        else:
+            print("  [p] 跳过 (牌堆已空)")
 
     while True:
         choice = input("  你的选择: ").strip().lower()
@@ -193,7 +203,7 @@ def describe_action(player_name: str, action: int, env: BlazingEightsEnv, drawn_
         return f"  {player_name} 跳过"
     if action >= 56:
         suit = action - 56
-        return f"  {player_name} 指定花色: {SUIT_SYMBOLS[suit]}"
+        return f"  {player_name} 指定花色: {SUIT_COLORS[suit]}{SUIT_SYMBOLS[suit]}{AI_RESET}"
     desc = f"  {player_name} 打出 {pretty_card(action)}"
     rank = card_rank(action)
     if is_swap(action):
@@ -264,7 +274,8 @@ def play_game(model_path: str, num_players: int, human_player: int = 0, show_ai:
                 # If played an 8, need to choose suit
                 if env.phase == "choose_suit" and env._pending_8_player == human_player:
                     suit_action = human_choose_action(env, human_player)
-                    print(f"  你指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+                    si = suit_action - 56
+                    print(f"  你指定花色: {SUIT_COLORS[si]}{SUIT_SYMBOLS[si]}{AI_RESET}")
                     obs, rewards, done, info = env.step(suit_action)
                 continue
         else:
@@ -273,36 +284,45 @@ def play_game(model_path: str, num_players: int, human_player: int = 0, show_ai:
 
             if env.phase == "choose_suit":
                 action = ai_choose_action(env, model, player, device)
-                print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[action - 56]}")
+                si = action - 56
+                print(f"  {AI_COLOR}{ai_name} 指定花色: {SUIT_COLORS[si]}{SUIT_SYMBOLS[si]}{AI_RESET}")
                 obs, rewards, done, info = env.step(action)
                 continue
 
             action = ai_choose_action(env, model, player, device)
 
             if action == DRAW_ACTION:
-                print(f"  {ai_name} 摸了一张牌")
                 obs, rewards, done, info = env.step(action)
+                # Check if AI has playable cards after drawing (observable "tell")
+                ai_legal = env.legal_actions(player)
+                has_playable = any(a < NUM_CARDS or (56 <= a <= 59) for a in ai_legal)
+                if has_playable:
+                    print(f"  {AI_COLOR}{ai_name} 摸了一张牌 (有牌可出){AI_RESET}")
+                else:
+                    print(f"  {AI_COLOR}{ai_name} 摸了一张牌 (无牌可出){AI_RESET}")
                 # AI still has their turn — now decide to play or pass
                 action2 = ai_choose_action(env, model, player, device)
                 if action2 == PASS_ACTION:
-                    print(f"  {ai_name} 选择不出牌")
+                    print(f"  {AI_COLOR}{ai_name} 选择不出牌{AI_RESET}")
                     obs, rewards, done, info = env.step(action2)
                 else:
-                    print(describe_action(ai_name, action2, env))
+                    print(f"  {AI_COLOR}{describe_action(ai_name, action2, env).strip()}{AI_RESET}")
                     obs, rewards, done, info = env.step(action2)
                     if env.phase == "choose_suit" and env._pending_8_player == player:
                         suit_action = ai_choose_action(env, model, player, device)
-                        print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+                        si = suit_action - 56
+                        print(f"  {AI_COLOR}{ai_name} 指定花色: {SUIT_COLORS[si]}{SUIT_SYMBOLS[si]}{AI_RESET}")
                         obs, rewards, done, info = env.step(suit_action)
             elif action == PASS_ACTION:
-                print(f"  {ai_name} 跳过")
+                print(f"  {AI_COLOR}{ai_name} 跳过{AI_RESET}")
                 obs, rewards, done, info = env.step(action)
             else:
-                print(describe_action(ai_name, action, env))
+                print(f"  {AI_COLOR}{describe_action(ai_name, action, env).strip()}{AI_RESET}")
                 obs, rewards, done, info = env.step(action)
                 if env.phase == "choose_suit" and env._pending_8_player == player:
                     suit_action = ai_choose_action(env, model, player, device)
-                    print(f"  {ai_name} 指定花色: {SUIT_SYMBOLS[suit_action - 56]}")
+                    si = suit_action - 56
+                    print(f"  {AI_COLOR}{ai_name} 指定花色: {SUIT_COLORS[si]}{SUIT_SYMBOLS[si]}{AI_RESET}")
                     obs, rewards, done, info = env.step(suit_action)
 
     # Game over
author	haoyuren <13851610112@163.com>	2026-02-22 03:16:08 -0600
committer	haoyuren <13851610112@163.com>	2026-02-22 03:16:08 -0600
commit	1cb5eb34ead9b4efc1032ec74c6ccc439f007c18 (patch)
tree	8b6fa5847d4057f9298f3799a9d26c5922050a2e
parent	dc4795bf7a9991fca4673bf928830b7b627034e4 (diff)