Keywords:
Agent、action_space、sample、Monitor、episode、maxstep、observation、reward、done、info、step、
my_random_agent.py
import gym
import argparse
## random agent
class RandomAgent(object):
def __init__(self, action_space):
self.action_space = action_space
def act(self, observation, reward, done):
return self.action_space.sample()
class BiasedAgent(object):
def __init__(self, action_space):
self.action_space = action_space
self.action_always = self.action_space.sample()
def act(self, observation, reward, done):
return self.action_always
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--display', action='store_true')
parser.add_argument('game', nargs="?", default="CartPole-v0")
args = parser.parse_args()
env = gym.make(args.game)
# env = gym.wrappers.Monitor(env, 'video', force = True)
num_episodes = 20
num_maxstep = 100
agent_id = 1
if agent_id == 1:
agent = RandomAgent(env.action_space)
elif agent_id == 2:
agent = BiasedAgent(env.action_space)
reward = 0
done = False
for i_episode in range(num_episodes):
observation = env.reset()
for t in range(num_maxstep):
env.render()
action = agent.act(observation, reward, done)
observation, reward, done, info = env.step(action)
print('episode {}-step {}, taking action {}, observation {}'.format(i_episode, t, action, observation))
if done:
print("Episode finished after {} timesteps".format(t+1))
break
env.close()
# Results:
python my_random_agent.py CartPole-v0
episode 0-step 0, taking action 0, observation [ 0.02091297 -0.18428767 -0.03879215 0.27603339]
episode 0-step 1, taking action 0, observation [ 0.01722722 -0.37883531 -0.03327148 0.55623328]
episode 0-step 2, taking action 1, observation [ 0.00965051 -0.18326243 -0.02214682 0.2532563 ]
episode 0-step 3, taking action 0, observation [ 0.00598526 -0.37806126 -0.01708169 0.53887227]
episode 0-step 4, taking action 0, observation [-0.00157596 -0.57293896 -0.00630424 0.8261244 ]
episode 0-step 5, taking action 0, observation [-0.01303474 -0.76797413 0.01021824 1.11681791]
episode 0-step 6, taking action 0, observation [-0.02839422 -0.96322869 0.0325546 1.41268857]
episode 0-step 7, taking action 0, observation [-0.0476588 -1.15873867 0.06080837 1.7153675 ]
episode 0-step 8, taking action 1, observation [-0.07083357 -0.96436472 0.09511572 1.44221211]
episode 0-step 9, taking action 0, observation [-0.09012087 -1.16052037 0.12395997 1.76303751]
episode 0-step 10, taking action 0, observation [-0.11333127 -1.35680743 0.15922072 2.09155982]
episode 0-step 11, taking action 1, observation [-0.14046742 -1.16360922 0.20105191 1.85204148]
episode 0-step 12, taking action 0, observation [-0.16373961 -1.36029526 0.23809274 2.19983962]
Episode finished after 13 timesteps
episode 1-step 0, taking action 0, observation [ 0.00422353 -0.14742106 -0.01258727 0.26181792]
episode 1-step 1, taking action 0, observation [ 0.00127511 -0.34236109 -0.00735091 0.55050425]
episode 1-step 2, taking action 0, observation [-0.00557211 -0.53737902 0.00365918 0.8408621 ]
episode 1-step 3, taking action 0, observation [-0.01631969 -0.73255074 0.02047642 1.13469351]
episode 1-step 4, taking action 1, observation [-0.03097071 -0.53770262 0.04317029 0.84850225]
episode 1-step 5, taking action 1, observation [-0.04172476 -0.34319522 0.06014033 0.56970111]
episode 1-step 6, taking action 1, observation [-0.04858866 -0.148966 0.07153436 0.29655459]
episode 1-step 7, taking action 0, observation [-0.05156798 -0.34503095 0.07746545 0.61091302]
episode 1-step 8, taking action 0, observation [-0.0584686 -0.54114531 0.08968371 0.92695443]
episode 1-step 9, taking action 0, observation [-0.06929151 -0.73735632 0.1082228 1.24641981]
episode 1-step 10, taking action 0, observation [-0.08403864 -0.93368695 0.13315119 1.57094827]
episode 1-step 11, taking action 1, observation [-0.10271237 -0.74038174 0.16457016 1.32258791]
episode 1-step 12, taking action 1, observation [-0.11752001 -0.54767645 0.19102192 1.08560099]
episode 1-step 13, taking action 1, observation [-0.12847354 -0.35551642 0.21273394 0.85842335]
Episode finished after 14 timesteps
......
......
......
episode 18-step 0, taking action 0, observation [-0.037643 -0.22180154 -0.04034859 0.26337507]
episode 18-step 1, taking action 0, observation [-0.04207903 -0.41632503 -0.03508109 0.54306356]
episode 18-step 2, taking action 0, observation [-0.05040553 -0.61093685 -0.02421982 0.82448996]
episode 18-step 3, taking action 1, observation [-0.06262427 -0.41549214 -0.00773002 0.52428894]
episode 18-step 4, taking action 0, observation [-0.07093411 -0.61050445 0.00275576 0.81452607]
episode 18-step 5, taking action 1, observation [-0.0831442 -0.41542035 0.01904628 0.52271122]
episode 18-step 6, taking action 1, observation [-0.09145261 -0.22057158 0.02950051 0.23609034]
episode 18-step 7, taking action 1, observation [-0.09586404 -0.02588325 0.03422231 -0.04714319]
episode 18-step 8, taking action 1, observation [-0.09638171 0.16873169 0.03327945 -0.32883528]
episode 18-step 9, taking action 1, observation [-0.09300707 0.36336446 0.02670274 -0.61084042]
episode 18-step 10, taking action 1, observation [-0.08573978 0.5581032 0.01448594 -0.89499472]
episode 18-step 11, taking action 1, observation [-0.07457772 0.75302576 -0.00341396 -1.18308917]
episode 18-step 12, taking action 1, observation [-0.0595172 0.94819184 -0.02707574 -1.47684029]
episode 18-step 13, taking action 0, observation [-0.04055337 0.75341084 -0.05661255 -1.19273518]
episode 18-step 14, taking action 1, observation [-0.02548515 0.94921857 -0.08046725 -1.50261142]
episode 18-step 15, taking action 0, observation [-0.00650078 0.75516034 -0.11051948 -1.23609781]
episode 18-step 16, taking action 0, observation [ 0.00860243 0.56161839 -0.13524144 -0.97998272]
episode 18-step 17, taking action 0, observation [ 0.0198348 0.36854293 -0.15484109 -0.73265393]
episode 18-step 18, taking action 1, observation [ 0.02720565 0.56542679 -0.16949417 -1.06978805]
episode 18-step 19, taking action 1, observation [ 0.03851419 0.76233494 -0.19088993 -1.41051083]
episode 18-step 20, taking action 0, observation [ 0.05376089 0.57002289 -0.21910015 -1.18306398]
Episode finished after 21 timesteps
episode 19-step 0, taking action 0, observation [-0.00408516 -0.16827423 0.01038206 0.33411965]
episode 19-step 1, taking action 0, observation [-0.00745064 -0.36354239 0.01706445 0.63005838]
episode 19-step 2, taking action 0, observation [-0.01472149 -0.55889826 0.02966562 0.92806621]
episode 19-step 3, taking action 0, observation [-0.02589945 -0.7544079 0.04822695 1.22992205]
episode 19-step 4, taking action 1, observation [-0.04098761 -0.55993844 0.07282539 0.95273026]
episode 19-step 5, taking action 0, observation [-0.05218638 -0.75596076 0.09187999 1.26737611]
episode 19-step 6, taking action 1, observation [-0.0673056 -0.56212471 0.11722751 1.00482318]
episode 19-step 7, taking action 0, observation [-0.07854809 -0.75860065 0.13732398 1.33190038]
episode 19-step 8, taking action 1, observation [-0.0937201 -0.56545138 0.16396199 1.08515046]
episode 19-step 9, taking action 1, observation [-0.10502913 -0.37282697 0.185665 0.8480781 ]
episode 19-step 10, taking action 0, observation [-0.11248567 -0.56992986 0.20262656 1.19292367]
episode 19-step 11, taking action 0, observation [-0.12388427 -0.76701546 0.22648503 1.54166931]
Episode finished after 12 timesteps
At a glance:

网友评论