update: notes

author: lanchunhui <zch921005@126.com> 2023-08-01 22:51:42 +0800
committer: lanchunhui <zch921005@126.com> 2023-08-01 22:51:42 +0800
commit: 5bff7e2bcc303bfa6caee9d0b95bc21540d4c279 (patch)
tree: 6439d338b3c2e5cce043bc67f591792ac228243d /rl/tutorials
parent: 5c4da3dfc9f0bdf2ed7c4c6ca4958524111f194b (diff)
1 files changed, 7 insertions, 1 deletions
diff --git a/rl/tutorials/actor_critic.ipynb b/rl/tutorials/actor_critic.ipynb
index 32e28ad..0b357d7 100644
--- a/rl/tutorials/actor_critic.ipynb
+++ b/rl/tutorials/actor_critic.ipynb
@@ -14,7 +14,13 @@
    "metadata": {},
    "source": [
     "- references\n",
-    "    - https://github.com/pytorch/examples/tree/main/reinforcement_learning"
+    "    - https://github.com/pytorch/examples/tree/main/reinforcement_learning\n",
+    "    - https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f\n",
+    "    - https://lilianweng.github.io/posts/2018-04-08-policy-gradient/\n",
+    "- Actor - Critic\n",
+    "    - Actor\n",
+    "        - The policy gradient method is also the “actor” part of Actor-Critic methods \n",
+    "  - Critic"
    ]
   }
  ],
author	lanchunhui <zch921005@126.com>	2023-08-01 22:51:42 +0800
committer	lanchunhui <zch921005@126.com>	2023-08-01 22:51:42 +0800
commit	5bff7e2bcc303bfa6caee9d0b95bc21540d4c279 (patch)
tree	6439d338b3c2e5cce043bc67f591792ac228243d /rl/tutorials
parent	5c4da3dfc9f0bdf2ed7c4c6ca4958524111f194b (diff)