diff options
| author | chzhang <zch921005@126.com> | 2022-12-04 19:47:30 +0800 |
|---|---|---|
| committer | chzhang <zch921005@126.com> | 2022-12-04 19:47:30 +0800 |
| commit | 8a4203f66b826fc82b481e2f999cc0816e366d76 (patch) | |
| tree | cf61212654148e49300d1d93bdf91a43259ba183 | |
| parent | 00461080f2c3bb9372c2ec28c2f40e0f64397077 (diff) | |
rl envs
| -rw-r--r-- | rl/tutorials/01_env.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/rl/tutorials/01_env.py b/rl/tutorials/01_env.py index e69de29..3002a53 100644 --- a/rl/tutorials/01_env.py +++ b/rl/tutorials/01_env.py @@ -0,0 +1,29 @@ + + +import gym +import time +from datetime import datetime +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common.evaluation import evaluate_policy + + +env_name = 'CartPole-v1' +env = gym.make(env_name) + +episodes = 5 +for episode in range(1, episodes + 1): + state = env.reset() + done = False + score = 0 + + while not done: + env.render() + action = env.action_space.sample() + n_state, reward, done, info = env.step(action) + print(reward, done) + score += reward + # time.sleep(0.5) + now = datetime.now().strftime('%H:%M:%S') + print('{}, Episode:{} Score:{}'.format(now, episode, score)) +env.close() |
