Add Colab GPU training notebook

Clone → train on GPU → download or push model back to GitHub. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
author: haoyuren <13851610112@163.com> 2026-02-22 01:49:36 -0600
committer: haoyuren <13851610112@163.com> 2026-02-22 01:49:36 -0600
commit: 60e5072dcc654322e050f54ae4b789550e6aa40a (patch)
tree: c9375aa1e6f9dad125189acaacf494d05f202fc4
parent: 72cf72d704ca1a3bf4e2a5e04dcbbad99dc0f98e (diff)
1 files changed, 204 insertions, 0 deletions
diff --git a/train_colab.ipynb b/train_colab.ipynb
new file mode 100644
index 0000000..4856b26
--- /dev/null
+++ b/train_colab.ipynb
@@ -0,0 +1,204 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "accelerator": "GPU"
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Blazing Eights - Colab GPU Training\n",
+    "\n",
+    "Clone repo → Train PPO agent on GPU → Push trained model back to GitHub"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Setup: Clone repo & install deps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# ====== CONFIG ======\n",
+    "GITHUB_USERNAME = \"haoyuren\"  # <-- your GitHub username\n",
+    "REPO_NAME = \"blazing8\"\n",
+    "# ====================\n",
+    "\n",
+    "!git clone https://github.com/{GITHUB_USERNAME}/{REPO_NAME}.git\n",
+    "%cd {REPO_NAME}\n",
+    "!pip install -q torch numpy"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "import torch\n",
+    "print(f\"PyTorch: {torch.__version__}\")\n",
+    "print(f\"CUDA available: {torch.cuda.is_available()}\")\n",
+    "if torch.cuda.is_available():\n",
+    "    print(f\"GPU: {torch.cuda.get_device_name(0)}\")"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# 2-player training: GPU makes the PPO update faster\n",
+    "!python train.py --num_players 2 --episodes 200000 --save_path blazing_ppo_2p"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# (Optional) 3-player training\n",
+    "# !python train.py --num_players 3 --episodes 300000 --save_path blazing_ppo_3p"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# (Optional) 4-player training\n",
+    "# !python train.py --num_players 4 --episodes 400000 --lr 1e-4 --ent_coef 0.02 --save_path blazing_ppo_4p"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Download model locally (Option A)\n",
+    "Download .pt files directly from Colab to your machine."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "from google.colab import files\n",
+    "import glob\n",
+    "\n",
+    "# Download the final model\n",
+    "for f in glob.glob(\"*_final.pt\"):\n",
+    "    print(f\"Downloading {f}...\")\n",
+    "    files.download(f)"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Push model to GitHub (Option B)\n",
+    "\n",
+    "Push trained .pt files to a `models/` directory in the repo.\n",
+    "\n",
+    "You'll need a **GitHub Personal Access Token** (PAT).\n",
+    "Create one at: https://github.com/settings/tokens → Generate new token (classic) → check `repo` scope."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "from getpass import getpass\n",
+    "import os\n",
+    "\n",
+    "TOKEN = getpass(\"Enter your GitHub PAT: \")\n",
+    "\n",
+    "# Configure git\n",
+    "!git config user.email \"colab@training.ai\"\n",
+    "!git config user.name \"Colab Training\"\n",
+    "\n",
+    "# Create models dir, move .pt files there\n",
+    "os.makedirs(\"models\", exist_ok=True)\n",
+    "!mv *_final.pt models/\n",
+    "!ls -lh models/\n",
+    "\n",
+    "# Remove .pt from gitignore temporarily for models/ dir\n",
+    "with open(\".gitignore\", \"r\") as f:\n",
+    "    lines = f.readlines()\n",
+    "with open(\".gitignore\", \"w\") as f:\n",
+    "    for line in lines:\n",
+    "        f.write(line)\n",
+    "    f.write(\"\\n# Allow models dir\\n!models/\\n!models/*.pt\\n\")\n",
+    "\n",
+    "!git add models/ .gitignore\n",
+    "!git commit -m \"Add trained models from Colab GPU\"\n",
+    "!git push https://{TOKEN}@github.com/{GITHUB_USERNAME}/{REPO_NAME}.git main"
+   ],
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Quick evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "import sys\n",
+    "sys.path.insert(0, \".\")\n",
+    "from train import PolicyValueNet, evaluate_vs_random\n",
+    "\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "model = PolicyValueNet().to(device)\n",
+    "\n",
+    "# Load the trained model\n",
+    "import glob\n",
+    "final_models = glob.glob(\"*_final.pt\") + glob.glob(\"models/*_final.pt\")\n",
+    "if final_models:\n",
+    "    ckpt = torch.load(final_models[0], map_location=device, weights_only=True)\n",
+    "    model.load_state_dict(ckpt[\"model\"])\n",
+    "    model.eval()\n",
+    "    print(f\"Loaded: {final_models[0]}\")\n",
+    "    print(f\"Trained for {ckpt.get('episode', '?')} episodes\")\n",
+    "    print()\n",
+    "\n",
+    "    for n in [2, 3, 4]:\n",
+    "        wr = evaluate_vs_random(model, num_players=n, num_games=2000, device=device)\n",
+    "        print(f\"  {n} players: win rate = {wr:.1%} (random baseline: {1/n:.1%})\")\n",
+    "else:\n",
+    "    print(\"No model found. Train first!\")"
+   ],
+   "execution_count": null,
+   "outputs": []
+  }
+ ]
+}
author	haoyuren <13851610112@163.com>	2026-02-22 01:49:36 -0600
committer	haoyuren <13851610112@163.com>	2026-02-22 01:49:36 -0600
commit	60e5072dcc654322e050f54ae4b789550e6aa40a (patch)
tree	c9375aa1e6f9dad125189acaacf494d05f202fc4
parent	72cf72d704ca1a3bf4e2a5e04dcbbad99dc0f98e (diff)