summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhaoyuren <13851610112@163.com>2026-02-22 03:07:47 -0600
committerhaoyuren <13851610112@163.com>2026-02-22 03:07:47 -0600
commitdc4795bf7a9991fca4673bf928830b7b627034e4 (patch)
treec5d2f03ed4041986a8a0acc10ef1e2ab85cf72be
parent480913b234ecf6147666bce641cecbaaeadd408a (diff)
Update rules: free draw/pass, remove Q in 2-player games
- Players can freely choose to draw even with playable cards - After drawing, players may pass instead of playing - Remove Q cards from deck in 2-player games (reverse has no effect) - Use greedy random opponent in evaluation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
-rw-r--r--blazing_env.py9
-rw-r--r--train.py10
2 files changed, 16 insertions, 3 deletions
diff --git a/blazing_env.py b/blazing_env.py
index c3d97ae..c440293 100644
--- a/blazing_env.py
+++ b/blazing_env.py
@@ -15,7 +15,10 @@ Special cards:
Rules:
- Match top card by suit OR rank (unless playing 8 or Swap)
- - Can't play → draw 1; if drawn card is playable, play it immediately
+ - Player may freely choose to draw even if they have playable cards
+ - After drawing, player may play any playable card OR pass (end turn)
+ - Each turn allows at most one draw
+ - If no playable cards and deck is empty, player must pass
- First player to empty hand wins
- Initial hand: 5 cards each
"""
@@ -92,7 +95,9 @@ class BlazingEightsEnv:
self.rng = np.random.default_rng(seed)
# Build & shuffle deck
- deck = list(range(NUM_CARDS))
+ # In 2-player games, remove Q cards (reverse has no effect)
+ deck = [c for c in range(NUM_CARDS)
+ if not (self.num_players == 2 and card_rank(c) == RANK_Q)]
self.rng.shuffle(deck)
# Deal 5 cards each
diff --git a/train.py b/train.py
index e955c09..7f85267 100644
--- a/train.py
+++ b/train.py
@@ -100,6 +100,14 @@ class Transition:
self.legal_mask = legal_mask
+def greedy_random_action(legal: list[int]) -> int:
+ """Pick a random playable card; only draw/pass if no card to play."""
+ play_actions = [a for a in legal if a < NUM_CARDS or (56 <= a <= 59)]
+ if play_actions:
+ return int(np.random.choice(play_actions))
+ return int(np.random.choice(legal))
+
+
def collect_game(env: BlazingEightsEnv, model: PolicyValueNet, device="cpu"):
"""
Play one full game, return per-player trajectories.
@@ -389,7 +397,7 @@ def evaluate_vs_random(model: PolicyValueNet, num_players=2, num_games=1000, dev
if player == 0:
action, _, _ = model.get_action(obs, legal, device)
else:
- action = np.random.choice(legal)
+ action = greedy_random_action(legal)
obs, rewards, done, info = env.step(action)
if done:
if env.winner == 0: