summaryrefslogtreecommitdiff
path: root/train_colab.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'train_colab.ipynb')
-rw-r--r--train_colab.ipynb52
1 files changed, 4 insertions, 48 deletions
diff --git a/train_colab.ipynb b/train_colab.ipynb
index 24ae0dc..afae94d 100644
--- a/train_colab.ipynb
+++ b/train_colab.ipynb
@@ -15,11 +15,7 @@
{
"cell_type": "markdown",
"metadata": {},
- "source": [
- "# Blazing Eights - Colab GPU Training\n",
- "\n",
- "Clone repo → Train PPO agent on GPU → Push trained model back to GitHub"
- ]
+ "source": "# Blazing Eights - Colab GPU Training\n\nClone repo → Train PPO agent (CPU collect, GPU update) → Push trained model back to GitHub\n\n**Game**: UNO variant with custom special cards (8=Wild, K=All draw, J=Skip, Swap=Swap hands)."
},
{
"cell_type": "markdown",
@@ -58,30 +54,14 @@
{
"cell_type": "code",
"metadata": {},
- "source": [
- "# 2-player training: GPU makes the PPO update faster\n",
- "!python train.py --num_players 2 --episodes 200000 --save_path blazing_ppo_2p"
- ],
+ "source": "# 2-player training with greedy warmup + CSV logging\n# Game simulation on CPU, PPO updates on GPU automatically\n!python train.py --num_players 2 --episodes 200000 --save_path blazing_ppo_2p\n\n# Show training log\nimport pandas as pd\ndf = pd.read_csv(\"blazing_ppo_2p_log.csv\")\nprint(df.to_string(index=False))",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {},
- "source": [
- "# (Optional) 3-player training\n",
- "# !python train.py --num_players 3 --episodes 300000 --save_path blazing_ppo_3p"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {},
- "source": [
- "# (Optional) 4-player training\n",
- "# !python train.py --num_players 4 --episodes 400000 --lr 1e-4 --ent_coef 0.02 --save_path blazing_ppo_4p"
- ],
+ "source": "# (Optional) 3-player training\n# !python train.py --num_players 3 --episodes 300000 --save_path blazing_ppo_3p\n\n# (Optional) Skip greedy warmup\n# !python train.py --num_players 2 --episodes 200000 --greedy_warmup 0 --save_path blazing_ppo_2p_no_warmup",
"execution_count": null,
"outputs": []
},
@@ -163,31 +143,7 @@
{
"cell_type": "code",
"metadata": {},
- "source": [
- "import sys\n",
- "sys.path.insert(0, \".\")\n",
- "from train import PolicyValueNet, evaluate_vs_random\n",
- "\n",
- "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
- "model = PolicyValueNet().to(device)\n",
- "\n",
- "# Load the trained model\n",
- "import glob\n",
- "final_models = glob.glob(\"*_final.pt\") + glob.glob(\"models/*_final.pt\")\n",
- "if final_models:\n",
- " ckpt = torch.load(final_models[0], map_location=device, weights_only=True)\n",
- " model.load_state_dict(ckpt[\"model\"])\n",
- " model.eval()\n",
- " print(f\"Loaded: {final_models[0]}\")\n",
- " print(f\"Trained for {ckpt.get('episode', '?')} episodes\")\n",
- " print()\n",
- "\n",
- " for n in [2, 3, 4]:\n",
- " wr = evaluate_vs_random(model, num_players=n, num_games=2000, device=device)\n",
- " print(f\" {n} players: win rate = {wr:.1%} (random baseline: {1/n:.1%})\")\n",
- "else:\n",
- " print(\"No model found. Train first!\")"
- ],
+ "source": "import sys\nsys.path.insert(0, \".\")\nfrom train import PolicyValueNet, evaluate_vs_random\n\ndevice = \"cpu\" # eval on CPU (single-sample inference)\nmodel = PolicyValueNet().to(device)\n\nimport glob\nfinal_models = glob.glob(\"*_final.pt\") + glob.glob(\"models/*_final.pt\")\nif final_models:\n ckpt = torch.load(final_models[0], map_location=device, weights_only=True)\n model.load_state_dict(ckpt[\"model\"])\n model.eval()\n print(f\"Loaded: {final_models[0]}\")\n print(f\"Trained for {ckpt.get('episode', '?')} episodes\")\n print()\n\n for n in [2, 3, 4]:\n wr = evaluate_vs_random(model, num_players=n, num_games=2000, device=device)\n print(f\" {n} players: win rate = {wr:.1%} (random baseline: {1/n:.1%})\")\nelse:\n print(\"No model found. Train first!\")",
"execution_count": null,
"outputs": []
}