blob: dd22d1dedf6d760a7f0ca759fb4995481a2143de (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
import gym
import numpy as np
env_name = 'CartPole-v1'
env = gym.make(env_name)
class Agent:
def __init__(self, env):
self.action_size = env.action_space.n
def action_policy(self, observation):
pos, vel, angle, _ = observation
if angle < 0:
return 0
return 1
if __name__ == '__main__':
observation = env.reset()
agent = Agent(env)
reward_history = []
for _ in range(100):
# env.render()
# action = agent.action_policy(observation)
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
reward_history.append(reward)
if done:
# env.env.close()
env.reset()
print(reward_history, np.mean(reward_history))
|