Commit 5999ce8e authored by Nicola Gatto's avatar Nicola Gatto

Adjust tests

parent 67b0ffc6
Pipeline #153290 failed with stages
in 15 seconds
......@@ -101,7 +101,7 @@ if __name__ == "__main__":
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
'net': qnet_creator.net,
'net': qnet_creator.networks[0],
}
agent = DqnAgent.resume_from_session(**resume_agent_params)
else:
......@@ -113,4 +113,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_newest', epoch=0)
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -548,7 +548,7 @@ class DdpgAgent(Agent):
actor_qvalues = tmp_critic(states, self._actor(states))
# For maximizing qvalues we have to multiply with -1
# as we use a minimizer
actor_loss = -1 * actor_qvalues
actor_loss = -1 * actor_qvalues.mean()
actor_loss.backward()
trainer_actor.step(self._minibatch_size)
......
......@@ -108,7 +108,7 @@ if __name__ == "__main__":
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
'net': qnet_creator.net,
'net': qnet_creator.networks[0],
}
agent = DqnAgent.resume_from_session(**resume_agent_params)
else:
......@@ -120,4 +120,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_newest', epoch=0)
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -548,7 +548,7 @@ class DdpgAgent(Agent):
actor_qvalues = tmp_critic(states, self._actor(states))
# For maximizing qvalues we have to multiply with -1
# as we use a minimizer
actor_loss = -1 * actor_qvalues
actor_loss = -1 * actor_qvalues.mean()
actor_loss.backward()
trainer_actor.step(self._minibatch_size)
......
......@@ -102,7 +102,7 @@ if __name__ == "__main__":
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
'net': qnet_creator.net,
'net': qnet_creator.networks[0],
}
agent = DqnAgent.resume_from_session(**resume_agent_params)
else:
......@@ -114,4 +114,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_newest', epoch=0)
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -548,7 +548,7 @@ class DdpgAgent(Agent):
actor_qvalues = tmp_critic(states, self._actor(states))
# For maximizing qvalues we have to multiply with -1
# as we use a minimizer
actor_loss = -1 * actor_qvalues
actor_loss = -1 * actor_qvalues.mean()
actor_loss.backward()
trainer_actor.step(self._minibatch_size)
......
......@@ -120,8 +120,8 @@ if __name__ == "__main__":
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
'actor': actor_creator.net,
'critic': critic_creator.net
'actor': actor_creator.networks[0],
'critic': critic_creator.networks[0]
}
agent = DdpgAgent.resume_from_session(**resume_agent_params)
else:
......@@ -133,4 +133,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(actor_creator._model_dir_ + actor_creator._model_prefix_ + '_newest', epoch=0)
agent.save_best_network(actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
import mxnet as mx
import logging
import os
from CNNNet_CriticNetwork import Net
from CNNNet_CriticNetwork import Net_0
class CNNCreator_CriticNetwork:
_model_dir_ = "model/CriticNetwork/"
_model_prefix_ = "model"
_input_shapes_ = [(8,),(3,)]
def __init__(self):
self.weight_initializer = mx.init.Normal()
self.net = None
def get_input_shapes(self):
return self._input_shapes_
self.networks = {}
def load(self, context):
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
return 0
else:
logging.info("Loading checkpoint: " + param_file)
self.net.load_parameters(self._model_dir_ + param_file)
return lastEpoch
earliestLastEpoch = None
for i, network in self.networks.items():
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_" + str(i) + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_" + str(i) + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ + "_" + str(i) in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "_" + str(i) + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
earliestLastEpoch = 0
else:
logging.info("Loading checkpoint: " + param_file)
network.load_parameters(self._model_dir_ + param_file)
if earliestLastEpoch == None or lastEpoch < earliestLastEpoch:
earliestLastEpoch = lastEpoch
return earliestLastEpoch
def construct(self, context, data_mean=None, data_std=None):
self.net = Net(data_mean=data_mean, data_std=data_std)
self.net.collect_params().initialize(self.weight_initializer, ctx=context)
self.net.hybridize()
self.net(mx.nd.zeros((1,)+self._input_shapes_[0], ctx=context),mx.nd.zeros((1,)+self._input_shapes_[1], ctx=context))
self.networks[0] = Net_0(data_mean=data_mean, data_std=data_std)
self.networks[0].collect_params().initialize(self.weight_initializer, ctx=context)
self.networks[0].hybridize()
self.networks[0](mx.nd.zeros((1, 8,), ctx=context), mx.nd.zeros((1, 3,), ctx=context))
if not os.path.exists(self._model_dir_):
os.makedirs(self._model_dir_)
self.net.export(self._model_dir_ + self._model_prefix_, epoch=0)
for i, network in self.networks.items():
network.export(self._model_dir_ + self._model_prefix_ + "_" + str(i), epoch=0)
......@@ -2,6 +2,16 @@ import mxnet as mx
import numpy as np
from mxnet import gluon
class OneHot(gluon.HybridBlock):
def __init__(self, size, **kwargs):
super(OneHot, self).__init__(**kwargs)
with self.name_scope():
self.size = size
def hybrid_forward(self, F, x):
return F.one_hot(indices=F.argmax(data=x, axis=1), depth=self.size)
class Softmax(gluon.HybridBlock):
def __init__(self, **kwargs):
super(Softmax, self).__init__(**kwargs)
......@@ -68,15 +78,17 @@ class NoNormalization(gluon.HybridBlock):
return x
class Net(gluon.HybridBlock):
class Net_0(gluon.HybridBlock):
def __init__(self, data_mean=None, data_std=None, **kwargs):
super(Net, self).__init__(**kwargs)
super(Net_0, self).__init__(**kwargs)
self.last_layers = {}
with self.name_scope():
if not data_mean is None:
assert(not data_std is None)
self.state_input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std)
if data_mean:
assert(data_std)
self.input_normalization_state = ZScoreNormalization(data_mean=data_mean['state'],
data_std=data_std['state'])
else:
self.state_input_normalization = NoNormalization()
self.input_normalization_state = NoNormalization()
self.fc2_1_ = gluon.nn.Dense(units=300, use_bias=True)
# fc2_1_, output shape: {[300,1,1]}
......@@ -85,11 +97,12 @@ class Net(gluon.HybridBlock):
self.fc3_1_ = gluon.nn.Dense(units=600, use_bias=True)
# fc3_1_, output shape: {[600,1,1]}
if not data_mean is None:
assert(not data_std is None)
self.action_input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std)
if data_mean:
assert(data_std)
self.input_normalization_action = ZScoreNormalization(data_mean=data_mean['action'],
data_std=data_std['action'])
else:
self.action_input_normalization = NoNormalization()
self.input_normalization_action = NoNormalization()
self.fc2_2_ = gluon.nn.Dense(units=600, use_bias=True)
# fc2_2_, output shape: {[600,1,1]}
......@@ -101,19 +114,21 @@ class Net(gluon.HybridBlock):
self.fc5_ = gluon.nn.Dense(units=1, use_bias=True)
# fc5_, output shape: {[1,1,1]}
self.last_layer = 'linear'
self.last_layers['qvalue'] = 'linear'
def hybrid_forward(self, F, state, action):
state = self.state_input_normalization(state)
outputs = []
state = self.input_normalization_state(state)
fc2_1_ = self.fc2_1_(state)
relu2_1_ = self.relu2_1_(fc2_1_)
fc3_1_ = self.fc3_1_(relu2_1_)
action = self.action_input_normalization(action)
action = self.input_normalization_action(action)
fc2_2_ = self.fc2_2_(action)
add4_ = fc3_1_ + fc2_2_
fc4_ = self.fc4_(add4_)
relu4_ = self.relu4_(fc4_)
fc5_ = self.fc5_(relu4_)
return fc5_
outputs.append(fc5_)
return outputs[0]
......@@ -548,7 +548,7 @@ class DdpgAgent(Agent):
actor_qvalues = tmp_critic(states, self._actor(states))
# For maximizing qvalues we have to multiply with -1
# as we use a minimizer
actor_loss = -1 * actor_qvalues
actor_loss = -1 * actor_qvalues.mean()
actor_loss.backward()
trainer_actor.step(self._minibatch_size)
......
......@@ -125,8 +125,8 @@ if __name__ == "__main__":
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
'actor': actor_creator.net,
'critic': critic_creator.net
'actor': actor_creator.networks[0],
'critic': critic_creator.networks[0]
}
agent = DdpgAgent.resume_from_session(**resume_agent_params)
else:
......@@ -138,4 +138,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(actor_creator._model_dir_ + actor_creator._model_prefix_ + '_newest', epoch=0)
agent.save_best_network(actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
import mxnet as mx
import logging
import os
from CNNNet_RosCriticNetwork import Net
from CNNNet_RosCriticNetwork import Net_0
class CNNCreator_RosCriticNetwork:
_model_dir_ = "model/RosCriticNetwork/"
_model_prefix_ = "model"
_input_shapes_ = [(8,),(3,)]
def __init__(self):
self.weight_initializer = mx.init.Normal()
self.net = None
def get_input_shapes(self):
return self._input_shapes_
self.networks = {}
def load(self, context):
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
return 0
else:
logging.info("Loading checkpoint: " + param_file)
self.net.load_parameters(self._model_dir_ + param_file)
return lastEpoch
earliestLastEpoch = None
for i, network in self.networks.items():
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_" + str(i) + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_" + str(i) + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ + "_" + str(i) in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "_" + str(i) + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
earliestLastEpoch = 0
else:
logging.info("Loading checkpoint: " + param_file)
network.load_parameters(self._model_dir_ + param_file)
if earliestLastEpoch == None or lastEpoch < earliestLastEpoch:
earliestLastEpoch = lastEpoch
return earliestLastEpoch
def construct(self, context, data_mean=None, data_std=None):
self.net = Net(data_mean=data_mean, data_std=data_std)
self.net.collect_params().initialize(self.weight_initializer, ctx=context)
self.net.hybridize()
self.net(mx.nd.zeros((1,)+self._input_shapes_[0], ctx=context),mx.nd.zeros((1,)+self._input_shapes_[1], ctx=context))
self.networks[0] = Net_0(data_mean=data_mean, data_std=data_std)
self.networks[0].collect_params().initialize(self.weight_initializer, ctx=context)
self.networks[0].hybridize()
self.networks[0](mx.nd.zeros((1, 8,), ctx=context), mx.nd.zeros((1, 3,), ctx=context))
if not os.path.exists(self._model_dir_):
os.makedirs(self._model_dir_)
self.net.export(self._model_dir_ + self._model_prefix_, epoch=0)
for i, network in self.networks.items():
network.export(self._model_dir_ + self._model_prefix_ + "_" + str(i), epoch=0)
......@@ -2,6 +2,16 @@ import mxnet as mx
import numpy as np
from mxnet import gluon
class OneHot(gluon.HybridBlock):
def __init__(self, size, **kwargs):
super(OneHot, self).__init__(**kwargs)
with self.name_scope():
self.size = size
def hybrid_forward(self, F, x):
return F.one_hot(indices=F.argmax(data=x, axis=1), depth=self.size)
class Softmax(gluon.HybridBlock):
def __init__(self, **kwargs):
super(Softmax, self).__init__(**kwargs)
......@@ -68,15 +78,17 @@ class NoNormalization(gluon.HybridBlock):
return x
class Net(gluon.HybridBlock):
class Net_0(gluon.HybridBlock):
def __init__(self, data_mean=None, data_std=None, **kwargs):
super(Net, self).__init__(**kwargs)
super(Net_0, self).__init__(**kwargs)
self.last_layers = {}
with self.name_scope():
if not data_mean is None:
assert(not data_std is None)
self.state_input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std)
if data_mean:
assert(data_std)
self.input_normalization_state = ZScoreNormalization(data_mean=data_mean['state'],
data_std=data_std['state'])
else:
self.state_input_normalization = NoNormalization()
self.input_normalization_state = NoNormalization()
self.fc2_1_ = gluon.nn.Dense(units=300, use_bias=True)
# fc2_1_, output shape: {[300,1,1]}
......@@ -85,11 +97,12 @@ class Net(gluon.HybridBlock):
self.fc3_1_ = gluon.nn.Dense(units=600, use_bias=True)
# fc3_1_, output shape: {[600,1,1]}
if not data_mean is None:
assert(not data_std is None)
self.action_input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std)
if data_mean:
assert(data_std)
self.input_normalization_action = ZScoreNormalization(data_mean=data_mean['action'],
data_std=data_std['action'])
else:
self.action_input_normalization = NoNormalization()
self.input_normalization_action = NoNormalization()
self.fc2_2_ = gluon.nn.Dense(units=600, use_bias=True)
# fc2_2_, output shape: {[600,1,1]}
......@@ -101,19 +114,21 @@ class Net(gluon.HybridBlock):
self.fc5_ = gluon.nn.Dense(units=1, use_bias=True)
# fc5_, output shape: {[1,1,1]}
self.last_layer = 'linear'
self.last_layers['qvalue'] = 'linear'
def hybrid_forward(self, F, state, action):
state = self.state_input_normalization(state)
outputs = []
state = self.input_normalization_state(state)
fc2_1_ = self.fc2_1_(state)
relu2_1_ = self.relu2_1_(fc2_1_)
fc3_1_ = self.fc3_1_(relu2_1_)
action = self.action_input_normalization(action)
action = self.input_normalization_action(action)
fc2_2_ = self.fc2_2_(action)
add4_ = fc3_1_ + fc2_2_
fc4_ = self.fc4_(add4_)
relu4_ = self.relu4_(fc4_)
fc5_ = self.fc5_(relu4_)
return fc5_
outputs.append(fc5_)
return outputs[0]
......@@ -548,7 +548,7 @@ class DdpgAgent(Agent):
actor_qvalues = tmp_critic(states, self._actor(states))
# For maximizing qvalues we have to multiply with -1
# as we use a minimizer
actor_loss = -1 * actor_qvalues
actor_loss = -1 * actor_qvalues.mean()
actor_loss.backward()
trainer_actor.step(self._minibatch_size)
......
......@@ -10,5 +10,5 @@ implementation Critic(state, action) {
) ->
Add() ->
FullyConnected(units=600) ->
Relu();
Relu()
}
\ No newline at end of file
......@@ -10,5 +10,5 @@ implementation Critic(state, action) {
) ->
Add() ->
FullyConnected(units=600) ->
Relu();
Relu()
}
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment