From 4e8e30b8bebf14775c966067550832e9cb175117 Mon Sep 17 00:00:00 2001 From: chzhang Date: Tue, 7 Mar 2023 22:36:04 +0800 Subject: q learning on cartpole --- rl/tutorials/08_cart_pole_q_learning.ipynb | 31433 +++++++++++++++++++++++++++ 1 file changed, 31433 insertions(+) create mode 100644 rl/tutorials/08_cart_pole_q_learning.ipynb (limited to 'rl') diff --git a/rl/tutorials/08_cart_pole_q_learning.ipynb b/rl/tutorials/08_cart_pole_q_learning.ipynb new file mode 100644 index 0000000..44c68ef --- /dev/null +++ b/rl/tutorials/08_cart_pole_q_learning.ipynb @@ -0,0 +1,31433 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:08:48.249166Z", + "start_time": "2023-03-07T14:08:48.246325Z" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import gym\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib import animation\n", + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:03:53.352738Z", + "start_time": "2023-03-07T14:03:53.345794Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.15.4'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gym.version.VERSION" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-05T14:51:34.744085Z", + "start_time": "2023-03-05T14:51:31.888070Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: http://mirrors.aliyun.com/pypi/simple/\r\n", + "Requirement already satisfied: JSAnimation in /Users/chunhuizhang/anaconda3/envs/py3/lib/python3.7/site-packages (0.1)\r\n" + ] + } + ], + "source": [ + "!pip install JSAnimation" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:04:07.798088Z", + "start_time": "2023-03-07T14:04:07.791365Z" + } + }, + "outputs": [], + "source": [ + "from JSAnimation.IPython_display import display_animation\n", + "from IPython.display import display, HTML" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 重新认识 cartpole 环境" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- 小滑块/倒立摆\n", + "- 典型的 mdp(markov decision process)\n", + " - 下一时刻的状态(转移)$s_{t+1}$ 只跟当前状态 $s_t$ 和(当前状态下采取的)$a_t$ 有关\n", + "- action space is discrete and finite\n", + "- state(observation)space is continuous" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:04:12.875444Z", + "start_time": "2023-03-07T14:04:12.854714Z" + } + }, + "outputs": [], + "source": [ + "env = gym.make('CartPole-v0')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### state/space" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:04:15.382240Z", + "start_time": "2023-03-07T14:04:15.377243Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-3.40156280e-02, -4.64929535e-05, -2.24331342e-02, -3.44344543e-02])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# init space\n", + "env.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:04:25.359650Z", + "start_time": "2023-03-07T14:04:25.356727Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Discrete(2)\n" + ] + } + ], + "source": [ + "# action space\n", + "print(env.action_space)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:04:30.487576Z", + "start_time": "2023-03-07T14:04:30.484735Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + } + ], + "source": [ + "print(env.action_space.n)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:04:51.524995Z", + "start_time": "2023-03-07T14:04:51.520685Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Box(4,)\n", + "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n", + "[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n", + "4\n" + ] + } + ], + "source": [ + "# observation space\n", + "# https://www.gymlibrary.dev/environments/classic_control/cart_pole/\n", + "print(env.observation_space)\n", + "print(env.observation_space.low)\n", + "print(env.observation_space.high)\n", + "print(env.observation_space.shape[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### one episode" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-05T14:57:32.225046Z", + "start_time": "2023-03-05T14:57:31.832564Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "step: 1, state: [-0.02012693 0.20925067 0.03958993 -0.29435305]\n", + "step: 2, state: [-0.01594191 0.40378649 0.03370287 -0.57429172]\n", + "step: 3, state: [-0.00786618 0.20820864 0.02221704 -0.27118479]\n", + "step: 4, state: [-0.00370201 0.01277682 0.01679334 0.02842187]\n", + "step: 5, state: [-0.00344647 -0.18258188 0.01736178 0.32635562]\n", + "step: 6, state: [-0.00709811 0.01228863 0.02388889 0.03919801]\n", + "step: 7, state: [-0.00685234 -0.18316759 0.02467285 0.33932136]\n", + "step: 8, state: [-0.01051569 -0.37863177 0.03145928 0.63968147]\n", + "step: 9, state: [-0.01808833 -0.18396223 0.04425291 0.35706915]\n", + "step: 10, state: [-0.02176757 -0.37968447 0.05139429 0.66337121]\n", + "step: 11, state: [-0.02936126 -0.18531377 0.06466172 0.38730339]\n", + "step: 12, state: [-0.03306753 0.00883356 0.07240779 0.1156883 ]\n", + "step: 13, state: [-0.03289086 -0.18724716 0.07472155 0.43030799]\n", + "step: 14, state: [-0.03663581 0.00674154 0.08332771 0.16208459]\n", + "step: 15, state: [-0.03650098 -0.18946834 0.0865694 0.47984822]\n", + "step: 16, state: [-0.04029034 -0.38569887 0.09616637 0.79851137]\n", + "step: 17, state: [-0.04800432 -0.19201853 0.11213659 0.53756254]\n", + "step: 18, state: [-0.05184469 0.00136303 0.12288785 0.28221117]\n", + "step: 19, state: [-0.05181743 0.19453743 0.12853207 0.0306729 ]\n", + "step: 20, state: [-0.04792668 0.38760429 0.12914553 -0.21885405]\n", + "step: 21, state: [-0.0401746 0.19089548 0.12476845 0.1116153 ]\n", + "step: 22, state: [-0.03635669 -0.00577295 0.12700075 0.44091157]\n", + "step: 23, state: [-0.03647215 -0.20244188 0.13581898 0.770777 ]\n", + "step: 24, state: [-0.04052098 -0.00942435 0.15123452 0.52372515]\n", + "step: 25, state: [-0.04070947 -0.20631499 0.16170903 0.85998375]\n", + "step: 26, state: [-0.04483577 -0.0137211 0.1789087 0.62219743]\n", + "step: 27, state: [-0.04511019 0.17851121 0.19135265 0.39077045]\n", + "step: 28, state: [-0.04153997 -0.01873813 0.19916806 0.73715912]\n", + "step: 29, state: [-0.04191473 0.17315764 0.21391124 0.51317475]\n" + ] + } + ], + "source": [ + "env = gym.make('CartPole-v0')\n", + "# observation = env.reset()\n", + "state = env.reset()\n", + "steps = 0\n", + "frames = []\n", + "while True:\n", + " frames.append(env.render(mode='rgb_array'))\n", + " action = env.action_space.sample()\n", + " state, reward, done, info = env.step(action)\n", + " steps += 1\n", + " print(f'step: {steps}, state: {state}')\n", + " if done:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-05T14:58:23.485976Z", + "start_time": "2023-03-05T14:58:23.446404Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8),\n", + " array([[[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " ...,\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]],\n", + " \n", + " [[255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " ...,\n", + " [255, 255, 255],\n", + " [255, 255, 255],\n", + " [255, 255, 255]]], dtype=uint8)]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frames" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:05:15.008732Z", + "start_time": "2023-03-07T14:05:15.003533Z" + } + }, + "outputs": [], + "source": [ + "def display_frames_as_gif(frames, output):\n", + " \"\"\"\n", + " Displays a list of frames as a gif, with controls\n", + " 以gif格式显示关键帧列,带有控件\n", + " \"\"\"\n", + " \n", + " fig = plt.figure(figsize=(frames[0].shape[1]/72.0, frames[0].shape[0]/72.0),dpi=72)\n", + " patch = plt.imshow(frames[0])\n", + " plt.axis('off')\n", + " \n", + " def animate(i):\n", + " img = patch.set_data(frames[i])\n", + " return img ## *** return是必须要有的 ***\n", + " \n", + " anim = animation.FuncAnimation(plt.gcf(), animate, frames=len(frames), interval=50)\n", + " \n", + " anim.save(output)\n", + " return HTML(anim.to_jshtml()) ## *** 返回一个HTML对象,以便被调用者显示。 ***\n", + " # display(display_animation(anim, default_mode='loop'))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-05T14:59:09.169623Z", + "start_time": "2023-03-05T14:58:48.224146Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "display_frames_as_gif(frames, output='./save/rand_cartpole.gif')\n", + "display_frames_as_gif(frames, output='./save/rand_cartpole.mp4')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 状态离散化" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-05T15:01:26.306200Z", + "start_time": "2023-03-05T15:01:26.301178Z" + } + }, + "outputs": [], + "source": [ + "NUM_DIGITIZED = 6\n", + "\n", + "# 分桶, 5个值,对应 6 个分段,即 6 个桶 (0, 1, 2, 3, 4, 5)\n", + "def bins(clip_min, clip_max, num_bins=NUM_DIGITIZED):\n", + " return np.linspace(clip_min, clip_max, num_bins+1)[1:-1]\n", + "\n", + "# 按 6 进制映射将 4位 6 进制数映射为 id,\n", + "def digitize_state(observation):\n", + " pos, cart_v, angle, pole_v = observation\n", + " digitized = [np.digitize(pos, bins=bins(-2.4, 2.4, NUM_DIGITIZED)), \n", + " np.digitize(cart_v, bins=bins(-3., 3, NUM_DIGITIZED)), \n", + " np.digitize(angle, bins=bins(-0.418, 0.418, NUM_DIGITIZED)), \n", + " np.digitize(pole_v, bins=bins(-2, 2, NUM_DIGITIZED))]\n", + " # 3,1,2,4 (4位10进制数) = 4*10^0 + 2*10^1 + 1*10^2 + 3*10^3,最终的取值范围是 0-9999,总计 10^4 == 10000\n", + " # a,b,c,d (4位6进制数) = d*6^0 + c*6^1 + b*6^2 + a*6^3,最终的取值范围是 0-`5555`(1295),总计 6^4 == 1296\n", + " ind = sum([d*(NUM_DIGITIZED**i) for i, d in enumerate(digitized)])\n", + " return ind\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-05T15:01:29.874797Z", + "start_time": "2023-03-05T15:01:29.871656Z" + } + }, + "outputs": [], + "source": [ + "obs = env.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-05T15:01:31.165281Z", + "start_time": "2023-03-05T15:01:31.161059Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.04002152, 0.00259074, -0.03391407, -0.04418303])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "obs" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-05T14:25:41.628539Z", + "start_time": "2023-03-05T14:25:41.625136Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-1.6, -0.8, 0. , 0.8, 1.6])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bins(-2.4, 2.4, NUM_DIGITIZED)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## q-learning" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:21:15.848998Z", + "start_time": "2023-03-07T14:21:15.838895Z" + } + }, + "outputs": [], + "source": [ + "class Agent:\n", + " def __init__(self, action_space, n_states, eta=0.5, gamma=0.99, NUM_DIGITIZED=6):\n", + " self.eta = 0.5\n", + " self.gamme = gamma\n", + " # Discrete(2)\n", + " self.action_space = action_space\n", + " self.NUM_DIGITIZED = NUM_DIGITIZED\n", + " self.q_table = np.random.uniform(0, 1, size=(NUM_DIGITIZED**n_states, self.action_space.n))\n", + " \n", + " # 分桶, 5个值,对应 6 个分段,即 6 个桶 (0, 1, 2, 3, 4, 5)\n", + " @staticmethod\n", + " def _bins(clip_min, clip_max, num_bins):\n", + " return np.linspace(clip_min, clip_max, num_bins+1)[1:-1]\n", + "\n", + " # 按 6 进制映射将 4位 6 进制数映射为 id,\n", + " @staticmethod\n", + " def _digitize_state(observation, NUM_DIGITIZED):\n", + " pos, cart_v, angle, pole_v = observation\n", + " digitized = [np.digitize(pos, bins=Agent._bins(-2.4, 2.4, NUM_DIGITIZED)), \n", + " np.digitize(cart_v, bins=Agent._bins(-3., 3, NUM_DIGITIZED)), \n", + " np.digitize(angle, bins=Agent._bins(-0.418, 0.418, NUM_DIGITIZED)), \n", + " np.digitize(pole_v, bins=Agent._bins(-2, 2, NUM_DIGITIZED))]\n", + " # 3,1,2,4 (4位10进制数) = 4*10^0 + 2*10^1 + 1*10^2 + 3*10^3,最终的取值范围是 0-9999,总计 10^4 == 10000\n", + " # a,b,c,d (4位6进制数) = d*6^0 + c*6^1 + b*6^2 + a*6^3,最终的取值范围是 0-`5555`(1295),总计 6^4 == 1296\n", + " ind = sum([d*(NUM_DIGITIZED**i) for i, d in enumerate(digitized)])\n", + " return ind\n", + "\n", + "# def update_q_table()\n", + " def q_learning(self, obs, action, reward, obs_next):\n", + " obs_ind = Agent._digitize_state(obs, self.NUM_DIGITIZED)\n", + " obs_next_ind = Agent._digitize_state(obs_next, self.NUM_DIGITIZED)\n", + " self.q_table[obs_ind, action] = self.q_table[obs_ind, action] + self.eta*(reward + max(self.q_table[obs_next_ind, :]) - self.q_table[obs_ind, action])\n", + " \n", + " def choose_action(self, state, episode):\n", + " eps = 0.5*1/(episode + 1)\n", + " state_ind = Agent._digitize_state(state, self.NUM_DIGITIZED)\n", + " # epsilon greedy\n", + " if random.random() < eps:\n", + " action = self.action_space.sample()\n", + " else:\n", + " action = np.argmax(self.q_table[state_ind, :])\n", + " return action" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:25:28.552642Z", + "start_time": "2023-03-07T14:25:15.162622Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "episode: 0, finish 15 time steps.\n", + "episode: 1, finish 9 time steps.\n", + "episode: 2, finish 44 time steps.\n", + "episode: 3, finish 21 time steps.\n", + "episode: 4, finish 14 time steps.\n", + "episode: 5, finish 79 time steps.\n", + "episode: 6, finish 45 time steps.\n", + "episode: 7, finish 8 time steps.\n", + "episode: 8, finish 12 time steps.\n", + "episode: 9, finish 26 time steps.\n", + "episode: 10, finish 71 time steps.\n", + "episode: 11, finish 91 time steps.\n", + "episode: 12, finish 66 time steps.\n", + "episode: 13, finish 40 time steps.\n", + "episode: 14, finish 57 time steps.\n", + "episode: 15, finish 14 time steps.\n", + "episode: 16, finish 9 time steps.\n", + "episode: 17, finish 125 time steps.\n", + "episode: 18, finish 9 time steps.\n", + "episode: 19, finish 22 time steps.\n", + "episode: 20, finish 30 time steps.\n", + "episode: 21, finish 54 time steps.\n", + "episode: 22, finish 34 time steps.\n", + "episode: 23, finish 37 time steps.\n", + "episode: 24, finish 12 time steps.\n", + "episode: 25, finish 8 time steps.\n", + "episode: 26, finish 53 time steps.\n", + "episode: 27, finish 9 time steps.\n", + "episode: 28, finish 95 time steps.\n", + "episode: 29, finish 48 time steps.\n", + "episode: 30, finish 23 time steps.\n", + "episode: 31, finish 12 time steps.\n", + "episode: 32, finish 84 time steps.\n", + "episode: 33, finish 10 time steps.\n", + "episode: 34, finish 63 time steps.\n", + "episode: 35, finish 23 time steps.\n", + "episode: 36, finish 85 time steps.\n", + "episode: 37, finish 10 time steps.\n", + "episode: 38, finish 73 time steps.\n", + "episode: 39, finish 100 time steps.\n", + "episode: 40, finish 81 time steps.\n", + "episode: 41, finish 68 time steps.\n", + "episode: 42, finish 92 time steps.\n", + "episode: 43, finish 129 time steps.\n", + "episode: 44, finish 15 time steps.\n", + "episode: 45, finish 96 time steps.\n", + "episode: 46, finish 10 time steps.\n", + "episode: 47, finish 7 time steps.\n", + "episode: 48, finish 7 time steps.\n", + "episode: 49, finish 9 time steps.\n", + "episode: 50, finish 38 time steps.\n", + "episode: 51, finish 85 time steps.\n", + "episode: 52, finish 90 time steps.\n", + "episode: 53, finish 18 time steps.\n", + "episode: 54, finish 80 time steps.\n", + "episode: 55, finish 104 time steps.\n", + "episode: 56, finish 156 time steps.\n", + "episode: 57, finish 183 time steps.\n", + "episode: 58, finish 25 time steps.\n", + "episode: 59, finish 121 time steps.\n", + "episode: 60, finish 122 time steps.\n", + "episode: 61, finish 112 time steps.\n", + "episode: 62, finish 19 time steps.\n", + "episode: 63, finish 56 time steps.\n", + "episode: 64, finish 183 time steps.\n", + "episode: 65, finish 35 time steps.\n", + "episode: 66, finish 23 time steps.\n", + "episode: 67, finish 146 time steps.\n", + "episode: 68, finish 58 time steps.\n", + "episode: 69, finish 114 time steps.\n", + "episode: 70, finish 39 time steps.\n", + "episode: 71, finish 107 time steps.\n", + "episode: 72, finish 58 time steps.\n", + "episode: 73, finish 189 time steps.\n", + "episode: 74, finish 27 time steps.\n", + "episode: 75, finish 106 time steps.\n", + "episode: 76, finish 102 time steps.\n", + "episode: 77, finish 109 time steps.\n", + "episode: 78, finish 48 time steps.\n", + "episode: 79, finish 85 time steps.\n", + "episode: 80, finish 82 time steps.\n", + "episode: 81, finish 90 time steps.\n", + "episode: 82, finish 199 time steps.\n", + "episode: 83, finish 10 time steps.\n", + "episode: 84, finish 120 time steps.\n", + "episode: 85, finish 59 time steps.\n", + "episode: 86, finish 168 time steps.\n", + "episode: 87, finish 199 time steps.\n", + "episode: 88, finish 166 time steps.\n", + "episode: 89, finish 199 time steps.\n", + "episode: 90, finish 43 time steps.\n", + "episode: 91, finish 131 time steps.\n", + "episode: 92, finish 54 time steps.\n", + "episode: 93, finish 191 time steps.\n", + "episode: 94, finish 199 time steps.\n", + "episode: 95, finish 71 time steps.\n", + "episode: 96, finish 199 time steps.\n", + "episode: 97, finish 167 time steps.\n", + "episode: 98, finish 199 time steps.\n", + "episode: 99, finish 102 time steps.\n", + "episode: 100, finish 27 time steps.\n", + "episode: 101, finish 184 time steps.\n", + "episode: 102, finish 122 time steps.\n", + "episode: 103, finish 166 time steps.\n", + "episode: 104, finish 120 time steps.\n", + "episode: 105, finish 39 time steps.\n", + "episode: 106, finish 116 time steps.\n", + "episode: 107, finish 155 time steps.\n", + "episode: 108, finish 109 time steps.\n", + "episode: 109, finish 199 time steps.\n", + "episode: 110, finish 31 time steps.\n", + "episode: 111, finish 199 time steps.\n", + "episode: 112, finish 152 time steps.\n", + "episode: 113, finish 199 time steps.\n", + "episode: 114, finish 117 time steps.\n", + "episode: 115, finish 95 time steps.\n", + "episode: 116, finish 165 time steps.\n", + "episode: 117, finish 143 time steps.\n", + "episode: 118, finish 32 time steps.\n", + "episode: 119, finish 9 time steps.\n", + "episode: 120, finish 55 time steps.\n", + "episode: 121, finish 177 time steps.\n", + "episode: 122, finish 199 time steps.\n", + "episode: 123, finish 164 time steps.\n", + "episode: 124, finish 80 time steps.\n", + "episode: 125, finish 42 time steps.\n", + "episode: 126, finish 135 time steps.\n", + "episode: 127, finish 46 time steps.\n", + "episode: 128, finish 42 time steps.\n", + "episode: 129, finish 199 time steps.\n", + "episode: 130, finish 112 time steps.\n", + "episode: 131, finish 18 time steps.\n", + "episode: 132, finish 17 time steps.\n", + "episode: 133, finish 8 time steps.\n", + "episode: 134, finish 55 time steps.\n", + "episode: 135, finish 113 time steps.\n", + "episode: 136, finish 39 time steps.\n", + "episode: 137, finish 198 time steps.\n", + "episode: 138, finish 130 time steps.\n", + "episode: 139, finish 199 time steps.\n", + "episode: 140, finish 37 time steps.\n", + "episode: 141, finish 48 time steps.\n", + "episode: 142, finish 42 time steps.\n", + "episode: 143, finish 170 time steps.\n", + "episode: 144, finish 176 time steps.\n", + "episode: 145, finish 199 time steps.\n", + "episode: 146, finish 136 time steps.\n", + "episode: 147, finish 94 time steps.\n", + "episode: 148, finish 199 time steps.\n", + "episode: 149, finish 190 time steps.\n", + "episode: 150, finish 95 time steps.\n", + "episode: 151, finish 199 time steps.\n", + "episode: 152, finish 34 time steps.\n", + "episode: 153, finish 24 time steps.\n", + "episode: 154, finish 180 time steps.\n", + "episode: 155, finish 103 time steps.\n", + "episode: 156, finish 157 time steps.\n", + "episode: 157, finish 199 time steps.\n", + "episode: 158, finish 140 time steps.\n", + "episode: 159, finish 154 time steps.\n", + "episode: 160, finish 83 time steps.\n", + "episode: 161, finish 148 time steps.\n", + "episode: 162, finish 86 time steps.\n", + "episode: 163, finish 87 time steps.\n", + "episode: 164, finish 145 time steps.\n", + "episode: 165, finish 139 time steps.\n", + "episode: 166, finish 199 time steps.\n", + "episode: 167, finish 84 time steps.\n", + "episode: 168, finish 191 time steps.\n", + "episode: 169, finish 162 time steps.\n", + "episode: 170, finish 137 time steps.\n", + "episode: 171, finish 115 time steps.\n", + "episode: 172, finish 117 time steps.\n", + "episode: 173, finish 199 time steps.\n", + "episode: 174, finish 199 time steps.\n", + "episode: 175, finish 199 time steps.\n", + "episode: 176, finish 90 time steps.\n", + "episode: 177, finish 199 time steps.\n", + "episode: 178, finish 199 time steps.\n", + "episode: 179, finish 199 time steps.\n", + "episode: 180, finish 125 time steps.\n", + "episode: 181, finish 199 time steps.\n", + "episode: 182, finish 199 time steps.\n", + "episode: 183, finish 199 time steps.\n", + "episode: 184, finish 199 time steps.\n", + "episode: 185, finish 199 time steps.\n", + "episode: 186, finish 199 time steps.\n", + "episode: 187, finish 199 time steps.\n", + "episode: 188, finish 199 time steps.\n", + "episode: 189, finish 199 time steps.\n", + "episode: 190, finish 199 time steps.\n", + "continue success(step > 195) more than 10 times \n", + "episode: 191, finish 199 time steps.\n" + ] + } + ], + "source": [ + "env = gym.make('CartPole-v0')\n", + "env.reset()\n", + "action_space = env.action_space\n", + "n_states = env.observation_space.shape[0]\n", + "\n", + "agent = Agent(action_space, n_states)\n", + "\n", + "max_episodes = 1000\n", + "max_steps = 200\n", + "\n", + "continue_success_episodes = 0\n", + "learning_finish_flag = False\n", + "\n", + "frames = []\n", + "\n", + "for episode in range(max_episodes):\n", + " obs = env.reset()\n", + " for step in range(max_steps):\n", + " if learning_finish_flag:\n", + " frames.append(env.render(mode='rgb_array'))\n", + " action = agent.choose_action(obs, episode)\n", + " obs_next, _, done, _ = env.step(action)\n", + " if done:\n", + " if step < 195:\n", + " reward = -1\n", + " continue_success_episodes = 0\n", + " else:\n", + " reward = 1\n", + " continue_success_episodes += 1\n", + " else:\n", + " reward = 0\n", + " \n", + " agent.q_learning(obs, action, reward, obs_next)\n", + " \n", + " if done:\n", + " print(f'episode: {episode}, finish {step} time steps.')\n", + " break\n", + " \n", + " obs = obs_next\n", + " \n", + " if learning_finish_flag:\n", + " break\n", + " if continue_success_episodes >= 10:\n", + " learning_finish_flag = True\n", + " print(f'continue success(step > 195) more than 10 times ')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-07T14:27:17.476903Z", + "start_time": "2023-03-07T14:25:53.586819Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5gAAAJqCAYAAABO5e8bAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAARD0lEQVR4nO3dQY+cdQHH8f+zu+x2KbJqEwNCjMFoSEDFCPGgB8NFExPilQMvg7fgG+DUCwlnEm+QeBD1oIkejBKUWMBqqlKDNoW6LUvLPh4gIIZtYf3OzG79fI7znyfzO+53npnZaZ7nAQAAAP+rtVUPAAAA4OYgMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASGzc4Nz/MAEAAOA/TQcduIMJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQ2Vj0AALg57V3653jt9z871LUbWyfHHQ98J14EwKIJTABgIa5evjj+/tsfHerazU+cEpgAx5CPyAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBCYAIAAJAQmAAAACQEJgAAAAmBCQAAQEJgAgAAkBCYAAAAJAQmAAAACYEJAABAQmACAACQEJgAAAAkBCYAAAAJgQkAAEBimuf5eufXPQQAjr5nnnlmPPXUU0t/3c/urI/HHrr9UNdevPL2OP3zN+JFNzZN03jyySfHyZMnl/7aAMfIdNDBxjJXAADLd+bMmfH0008v/XW/fM9nxmMPff9Q1+7u7q5k8zRN4/Tp00t/XYCbhY/IAgAAkHAHEwBYqg/7ds504IetADhOBCYAsHD789rY298e8zyNX1743nh7fv9PkDtP/HHcc9vz48TartAEOOYEJgCwUPM8xiu7Xxl/uPTQu498sCLPXr5/nL18/3hg5yfj7ltfXv5AADK+gwkALNQru18dZy49ON4Jyw+7RfnO4y+88a1x7vIXlzsOgJQ7mADAwpzdvW+cufT1MX+E97SvzZvjhTe++W5u/nrx4wDIuYMJACzEW/snxoW37hj7H+P97LfnzfGb17899va3FzcMgIURmADAQuxe2xmvvvmFVc8AYIkEJgBw5Fzd31r1BAAOQWACAEfMNH514burHgHAIQhMAOCImcc9J59f9QgADkFgAgBHzp3bZ1c9AYBDEJgAwELcsrY3Tq5fXPUMAJZIYAIAC3HbxsVx9/ZLH/u6u7dfGhvT1QUsAmDRBCYAsDCfu/XFcWrzb2OM+SM8ex53bZ8Z993+i7EuMAGOJYEJACzM1vqb4xuffnac2nz1Bs+cx13bL48Hdn46bll7aynbAOhtrHoAAHBzW5v2x9c++ePxlytfGmOMcebSg2N/rL93/qlbzo87TvxpfP7k78Y0rWolAAWBCQAsxMt/vTAe+8EP/+vRabx+9dQY4/2S3Fy7MrbX//WBZ129tr/4gQDkBCYAsBBX9q6NF//8jw85eW3pWwBYDt/BBAAAIHHdO5gPP/zwsnYAAAty7ty5VU84NuZ5Ho888sjY2PAhL4CDPPfccweeTfN88M+G7+3tfZTfFAcAjrAnnnhiPP7446uecSxM0zTOnz8/dnZ2Vj0F4Mja2to68CfZrvv23NbWVr8GAFiq9fX1Gz+J92xubvobCOCQfAcTAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABIbqx4AACzWvffeOx599NFVzzgWpmkam5ubq54BcGxN8zxf7/y6hwAAAPzfmQ468BFZAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASAhMAAAAEgITAACAhMAEAAAgITABAABICEwAAAASAhMAAICEwAQAACCxcYPzaSkrAAAAOPbcwQQAACAhMAEAAEgITAAAABICEwAAgITABAAAICEwAQAASPwb9M/Acx0K05MAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "display_frames_as_gif(frames, output='./save/cart_pole_q_learrning.gif')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- cgit v1.2.3