diff options
| author | haoyuren <13851610112@163.com> | 2026-02-22 01:49:36 -0600 |
|---|---|---|
| committer | haoyuren <13851610112@163.com> | 2026-02-22 01:49:36 -0600 |
| commit | 60e5072dcc654322e050f54ae4b789550e6aa40a (patch) | |
| tree | c9375aa1e6f9dad125189acaacf494d05f202fc4 /train_colab.ipynb | |
| parent | 72cf72d704ca1a3bf4e2a5e04dcbbad99dc0f98e (diff) | |
Add Colab GPU training notebook
Clone → train on GPU → download or push model back to GitHub.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'train_colab.ipynb')
| -rw-r--r-- | train_colab.ipynb | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/train_colab.ipynb b/train_colab.ipynb new file mode 100644 index 0000000..4856b26 --- /dev/null +++ b/train_colab.ipynb @@ -0,0 +1,204 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Blazing Eights - Colab GPU Training\n", + "\n", + "Clone repo → Train PPO agent on GPU → Push trained model back to GitHub" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setup: Clone repo & install deps" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "# ====== CONFIG ======\n", + "GITHUB_USERNAME = \"haoyuren\" # <-- your GitHub username\n", + "REPO_NAME = \"blazing8\"\n", + "# ====================\n", + "\n", + "!git clone https://github.com/{GITHUB_USERNAME}/{REPO_NAME}.git\n", + "%cd {REPO_NAME}\n", + "!pip install -q torch numpy" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "import torch\n", + "print(f\"PyTorch: {torch.__version__}\")\n", + "print(f\"CUDA available: {torch.cuda.is_available()}\")\n", + "if torch.cuda.is_available():\n", + " print(f\"GPU: {torch.cuda.get_device_name(0)}\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Train" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "# 2-player training: GPU makes the PPO update faster\n", + "!python train.py --num_players 2 --episodes 200000 --save_path blazing_ppo_2p" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "# (Optional) 3-player training\n", + "# !python train.py --num_players 3 --episodes 300000 --save_path blazing_ppo_3p" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "# (Optional) 4-player training\n", + "# !python train.py --num_players 4 --episodes 400000 --lr 1e-4 --ent_coef 0.02 --save_path blazing_ppo_4p" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Download model locally (Option A)\n", + "Download .pt files directly from Colab to your machine." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "from google.colab import files\n", + "import glob\n", + "\n", + "# Download the final model\n", + "for f in glob.glob(\"*_final.pt\"):\n", + " print(f\"Downloading {f}...\")\n", + " files.download(f)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Push model to GitHub (Option B)\n", + "\n", + "Push trained .pt files to a `models/` directory in the repo.\n", + "\n", + "You'll need a **GitHub Personal Access Token** (PAT).\n", + "Create one at: https://github.com/settings/tokens → Generate new token (classic) → check `repo` scope." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "from getpass import getpass\n", + "import os\n", + "\n", + "TOKEN = getpass(\"Enter your GitHub PAT: \")\n", + "\n", + "# Configure git\n", + "!git config user.email \"colab@training.ai\"\n", + "!git config user.name \"Colab Training\"\n", + "\n", + "# Create models dir, move .pt files there\n", + "os.makedirs(\"models\", exist_ok=True)\n", + "!mv *_final.pt models/\n", + "!ls -lh models/\n", + "\n", + "# Remove .pt from gitignore temporarily for models/ dir\n", + "with open(\".gitignore\", \"r\") as f:\n", + " lines = f.readlines()\n", + "with open(\".gitignore\", \"w\") as f:\n", + " for line in lines:\n", + " f.write(line)\n", + " f.write(\"\\n# Allow models dir\\n!models/\\n!models/*.pt\\n\")\n", + "\n", + "!git add models/ .gitignore\n", + "!git commit -m \"Add trained models from Colab GPU\"\n", + "!git push https://{TOKEN}@github.com/{GITHUB_USERNAME}/{REPO_NAME}.git main" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Quick evaluation" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "import sys\n", + "sys.path.insert(0, \".\")\n", + "from train import PolicyValueNet, evaluate_vs_random\n", + "\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "model = PolicyValueNet().to(device)\n", + "\n", + "# Load the trained model\n", + "import glob\n", + "final_models = glob.glob(\"*_final.pt\") + glob.glob(\"models/*_final.pt\")\n", + "if final_models:\n", + " ckpt = torch.load(final_models[0], map_location=device, weights_only=True)\n", + " model.load_state_dict(ckpt[\"model\"])\n", + " model.eval()\n", + " print(f\"Loaded: {final_models[0]}\")\n", + " print(f\"Trained for {ckpt.get('episode', '?')} episodes\")\n", + " print()\n", + "\n", + " for n in [2, 3, 4]:\n", + " wr = evaluate_vs_random(model, num_players=n, num_games=2000, device=device)\n", + " print(f\" {n} players: win rate = {wr:.1%} (random baseline: {1/n:.1%})\")\n", + "else:\n", + " print(\"No model found. Train first!\")" + ], + "execution_count": null, + "outputs": [] + } + ] +} |
