Update rules: free draw/pass, remove Q in 2-player games

- Players can freely choose to draw even with playable cards - After drawing, players may pass instead of playing - Remove Q cards from deck in 2-player games (reverse has no effect) - Use greedy random opponent in evaluation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
author: haoyuren <13851610112@163.com> 2026-02-22 03:07:47 -0600
committer: haoyuren <13851610112@163.com> 2026-02-22 03:07:47 -0600
commit: dc4795bf7a9991fca4673bf928830b7b627034e4 (patch)
tree: c5d2f03ed4041986a8a0acc10ef1e2ab85cf72be /train.py
parent: 480913b234ecf6147666bce641cecbaaeadd408a (diff)
1 files changed, 9 insertions, 1 deletions
diff --git a/train.py b/train.py
index e955c09..7f85267 100644
--- a/train.py
+++ b/train.py
@@ -100,6 +100,14 @@ class Transition:
         self.legal_mask = legal_mask
 
 
+def greedy_random_action(legal: list[int]) -> int:
+    """Pick a random playable card; only draw/pass if no card to play."""
+    play_actions = [a for a in legal if a < NUM_CARDS or (56 <= a <= 59)]
+    if play_actions:
+        return int(np.random.choice(play_actions))
+    return int(np.random.choice(legal))
+
+
 def collect_game(env: BlazingEightsEnv, model: PolicyValueNet, device="cpu"):
     """
     Play one full game, return per-player trajectories.
@@ -389,7 +397,7 @@ def evaluate_vs_random(model: PolicyValueNet, num_players=2, num_games=1000, dev
             if player == 0:
                 action, _, _ = model.get_action(obs, legal, device)
             else:
-                action = np.random.choice(legal)
+                action = greedy_random_action(legal)
             obs, rewards, done, info = env.step(action)
             if done:
                 if env.winner == 0:
author	haoyuren <13851610112@163.com>	2026-02-22 03:07:47 -0600
committer	haoyuren <13851610112@163.com>	2026-02-22 03:07:47 -0600
commit	dc4795bf7a9991fca4673bf928830b7b627034e4 (patch)
tree	c5d2f03ed4041986a8a0acc10ef1e2ab85cf72be /train.py
parent	480913b234ecf6147666bce641cecbaaeadd408a (diff)