diff options
| author | lanchunhui <zch921005@126.com> | 2023-08-01 22:51:42 +0800 |
|---|---|---|
| committer | lanchunhui <zch921005@126.com> | 2023-08-01 22:51:42 +0800 |
| commit | 5bff7e2bcc303bfa6caee9d0b95bc21540d4c279 (patch) | |
| tree | 6439d338b3c2e5cce043bc67f591792ac228243d /rl/tutorials | |
| parent | 5c4da3dfc9f0bdf2ed7c4c6ca4958524111f194b (diff) | |
update: notes
Diffstat (limited to 'rl/tutorials')
| -rw-r--r-- | rl/tutorials/actor_critic.ipynb | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/rl/tutorials/actor_critic.ipynb b/rl/tutorials/actor_critic.ipynb index 32e28ad..0b357d7 100644 --- a/rl/tutorials/actor_critic.ipynb +++ b/rl/tutorials/actor_critic.ipynb @@ -14,7 +14,13 @@ "metadata": {}, "source": [ "- references\n", - " - https://github.com/pytorch/examples/tree/main/reinforcement_learning" + " - https://github.com/pytorch/examples/tree/main/reinforcement_learning\n", + " - https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f\n", + " - https://lilianweng.github.io/posts/2018-04-08-policy-gradient/\n", + "- Actor - Critic\n", + " - Actor\n", + " - The policy gradient method is also the “actor” part of Actor-Critic methods \n", + " - Critic" ] } ], |
