-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_control_agent.py
79 lines (64 loc) · 2.8 KB
/
train_control_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from unityagents import UnityEnvironment
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
import torch
from ddpg_agent import Agent
def ddpg(n_episodes=200, max_t=2000):
"""Deep Deterministic Policy Gradient (DDPG).
Params
======
n_episodes (int): maximum number of training episodes
max_t (int): maximum number of time steps per episode
"""
scores_deque = deque(maxlen=100)
scores = []
max_score = -np.Inf
for i_episode in range(n_episodes):
env_info = env.reset(train_mode=True)[brain_name] # reset the environment
states = env_info.vector_observations # get the current state (for each agent)
score = np.zeros(num_agents)
for t in range(max_t):
actions = agent.act(states, add_noise=True)
env_info = env.step(actions)[brain_name] # send all actions to tne environment
next_states = env_info.vector_observations # get next state (for each agent)
rewards = env_info.rewards # get reward (for each agent)
dones = env_info.local_done # see if episode finished
agent.step(states, actions, rewards, next_states, dones)
states = next_states # roll over states to next time step
score += rewards
if np.any(dones):
break
scores_deque.append(score)
scores.append(np.mean(score))
print('\rEpisode {}\tAverage 100 Score: {:.2f}\t Mean Score: {:.2f}'.format(i_episode, np.mean(scores_deque), np.mean(score)), end="")
if i_episode % 10 == 0:
torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
print('\rEpisode {}\tAverage 100 Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
if (np.mean(scores_deque) >= 31):
break
return scores
# 1. Create environment
env = UnityEnvironment(
# file_name="simulator/Reacher_Single/Reacher.x86_64")
file_name="simulator/Reacher_Multi/Reacher.x86_64", no_graphics=True)
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
env_info = env.reset(train_mode=True)[brain_name]
num_agents = len(env_info.agents)
action_size = brain.vector_action_space_size
states = env_info.vector_observations
state_size = states.shape[1]
print('Number of agents:', num_agents)
# 2. Create agent
agent = Agent(state_size, action_size, random_seed=14)
# 3. Roll out DQN algorithm
scores = ddpg()
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()