Raise entropy floor to 0.02, increase eval games to 2000

Prevents premature convergence with higher entropy minimum and reduces eval variance with 4x more evaluation games. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
author: haoyuren <13851610112@163.com> 2026-02-22 12:55:03 -0600
committer: haoyuren <13851610112@163.com> 2026-02-22 12:55:03 -0600
commit: 1f4ee77d131649b674a80f3e43804acf323b462c (patch)
tree: bca8264788a6c65c11dd074f168ec9a3ab08b081
parent: ba4715c767d6538fc6f0c6c7e92073938ea03f2c (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/train.py b/train.py
index c1f4734..60e35ec 100644
--- a/train.py
+++ b/train.py
@@ -473,7 +473,7 @@ def train(args):
             avg_len = np.mean(all_game_lengths[-args.eval_every:]) if all_game_lengths else 0
             avg_loss_log = recent_loss / max(recent_loss_count, 1) if recent_loss_count > 0 else 0
             vs_wr = evaluate_vs_greedy_batch(model, num_players=args.num_players,
-                                             num_games=500, device=collect_device)
+                                             num_games=2000, device=collect_device)
             with open(log_path, "a") as f:
                 f.write(f"{ep},{avg_len:.1f},{avg_loss_log:.4f},{vs_wr:.4f}\n")
             tqdm.write(f"  [Eval ep{ep}] avg_len={avg_len:.1f} vs_greedy={vs_wr:.1%}")
@@ -571,7 +571,7 @@ if __name__ == "__main__":
     parser.add_argument("--gamma", type=float, default=0.99)
     parser.add_argument("--lam", type=float, default=0.95)
     parser.add_argument("--clip_eps", type=float, default=0.2)
-    parser.add_argument("--ent_coef", type=float, default=0.01,
+    parser.add_argument("--ent_coef", type=float, default=0.02,
                         help="Final entropy coefficient")
     parser.add_argument("--ent_start", type=float, default=None,
                         help="Initial entropy coef, linearly decays to ent_coef (default: 5x ent_coef)")
author	haoyuren <13851610112@163.com>	2026-02-22 12:55:03 -0600
committer	haoyuren <13851610112@163.com>	2026-02-22 12:55:03 -0600
commit	1f4ee77d131649b674a80f3e43804acf323b462c (patch)
tree	bca8264788a6c65c11dd074f168ec9a3ab08b081
parent	ba4715c767d6538fc6f0c6c7e92073938ea03f2c (diff)