summaryrefslogtreecommitdiff
path: root/rl/tutorials
diff options
context:
space:
mode:
authorlanchunhui <zch921005@126.com>2023-08-01 22:51:42 +0800
committerlanchunhui <zch921005@126.com>2023-08-01 22:51:42 +0800
commit5bff7e2bcc303bfa6caee9d0b95bc21540d4c279 (patch)
tree6439d338b3c2e5cce043bc67f591792ac228243d /rl/tutorials
parent5c4da3dfc9f0bdf2ed7c4c6ca4958524111f194b (diff)
update: notes
Diffstat (limited to 'rl/tutorials')
-rw-r--r--rl/tutorials/actor_critic.ipynb8
1 files changed, 7 insertions, 1 deletions
diff --git a/rl/tutorials/actor_critic.ipynb b/rl/tutorials/actor_critic.ipynb
index 32e28ad..0b357d7 100644
--- a/rl/tutorials/actor_critic.ipynb
+++ b/rl/tutorials/actor_critic.ipynb
@@ -14,7 +14,13 @@
"metadata": {},
"source": [
"- references\n",
- " - https://github.com/pytorch/examples/tree/main/reinforcement_learning"
+ " - https://github.com/pytorch/examples/tree/main/reinforcement_learning\n",
+ " - https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f\n",
+ " - https://lilianweng.github.io/posts/2018-04-08-policy-gradient/\n",
+ "- Actor - Critic\n",
+ " - Actor\n",
+ " - The policy gradient method is also the “actor” part of Actor-Critic methods \n",
+ " - Critic"
]
}
],