Commit 3d680f2e authored by Sebastian Nickels's avatar Sebastian Nickels

Merge

parents b85b4bce d3c1bc00
......@@ -8,7 +8,7 @@
<groupId>de.monticore.lang.monticar</groupId>
<artifactId>embedded-montiarc-emadl-generator</artifactId>
<version>0.3.3-SNAPSHOT</version>
<version>0.3.4-SNAPSHOT</version>
<!-- == PROJECT DEPENDENCIES ============================================= -->
......
......@@ -9,6 +9,7 @@ import de.monticore.lang.monticar.cnnarch.mxnetgenerator.CNNArch2MxNet;
import de.monticore.lang.monticar.cnnarch.caffe2generator.CNNArch2Caffe2;
import de.monticore.lang.monticar.cnnarch.mxnetgenerator.CNNTrain2MxNet;
import de.monticore.lang.monticar.cnnarch.caffe2generator.CNNTrain2Caffe2;
import de.monticore.lang.monticar.emadl.generator.reinforcementlearning.RewardFunctionCppGenerator;
import java.util.Optional;
......
......@@ -34,6 +34,8 @@ import de.monticore.lang.monticar.cnnarch._symboltable.ArchitectureSymbol;
import de.monticore.lang.monticar.cnnarch._symboltable.SerialCompositeElementSymbol;
import de.monticore.lang.monticar.cnnarch.gluongenerator.CNNTrain2Gluon;
import de.monticore.lang.monticar.cnnarch.gluongenerator.annotations.ArchitectureAdapter;
import de.monticore.lang.monticar.cnntrain._cocos.CNNTrainCoCoChecker;
import de.monticore.lang.monticar.cnntrain._cocos.CNNTrainCocos;
import de.monticore.lang.monticar.cnntrain._symboltable.ConfigurationSymbol;
import de.monticore.lang.monticar.emadl._cocos.EMADLCocos;
import de.monticore.lang.monticar.generator.FileContent;
......@@ -115,6 +117,18 @@ public class EMADLGenerator {
processedArchitecture = new HashMap<>();
setModelsPath( modelPath );
TaggingResolver symtab = EMADLAbstractSymtab.createSymTabAndTaggingResolver(getModelsPath());
EMAComponentInstanceSymbol instance = resolveComponentInstanceSymbol(qualifiedName, symtab);
generateFiles(symtab, instance, symtab, pythonPath, forced);
if (doCompile) {
compile();
}
processedArchitecture = null;
}
private EMAComponentInstanceSymbol resolveComponentInstanceSymbol(String qualifiedName, TaggingResolver symtab) {
EMAComponentSymbol component = symtab.<EMAComponentSymbol>resolve(qualifiedName, EMAComponentSymbol.KIND).orElse(null);
List<String> splitName = Splitters.DOT.splitToList(qualifiedName);
......@@ -126,15 +140,7 @@ public class EMADLGenerator {
System.exit(1);
}
EMAComponentInstanceSymbol instance = component.getEnclosingScope().<EMAComponentInstanceSymbol>resolve(instanceName, EMAComponentInstanceSymbol.KIND).get();
generateFiles(symtab, instance, symtab, pythonPath, forced);
if (doCompile) {
compile();
}
processedArchitecture = null;
return component.getEnclosingScope().<EMAComponentInstanceSymbol>resolve(instanceName, EMAComponentInstanceSymbol.KIND).get();
}
public void compile() throws IOException {
......@@ -530,7 +536,32 @@ public class EMADLGenerator {
final String fullConfigName = String.join(".", names);
ArchitectureSymbol correspondingArchitecture = this.processedArchitecture.get(fullConfigName);
assert correspondingArchitecture != null : "No architecture found for train " + fullConfigName + " configuration!";
configuration.setTrainedArchitecture(new ArchitectureAdapter(correspondingArchitecture));
configuration.setTrainedArchitecture(
new ArchitectureAdapter(correspondingArchitecture.getName(), correspondingArchitecture));
CNNTrainCocos.checkTrainedArchitectureCoCos(configuration);
// Resolve critic network if critic is present
if (configuration.getCriticName().isPresent()) {
String fullCriticName = configuration.getCriticName().get();
int indexOfFirstNameCharacter = fullCriticName.lastIndexOf('.') + 1;
fullCriticName = fullCriticName.substring(0, indexOfFirstNameCharacter)
+ fullCriticName.substring(indexOfFirstNameCharacter, indexOfFirstNameCharacter + 1).toUpperCase()
+ fullCriticName.substring(indexOfFirstNameCharacter + 1);
TaggingResolver symtab = EMADLAbstractSymtab.createSymTabAndTaggingResolver(getModelsPath());
EMAComponentInstanceSymbol instanceSymbol = resolveComponentInstanceSymbol(fullCriticName, symtab);
EMADLCocos.checkAll(instanceSymbol);
Optional<ArchitectureSymbol> critic = instanceSymbol.getSpannedScope().resolve("", ArchitectureSymbol.KIND);
if (!critic.isPresent()) {
Log.error("During the resolving of critic component: Critic component "
+ fullCriticName + " does not have a CNN implementation but is required to have one");
System.exit(-1);
}
critic.get().setComponentName(fullCriticName);
configuration.setCriticNetwork(new ArchitectureAdapter(fullCriticName, critic.get()));
CNNTrainCocos.checkCriticCocos(configuration);
}
cnnTrainGenerator.setInstanceName(componentInstance.getFullName().replaceAll("\\.", "_"));
Map<String, String> fileContentMap = cnnTrainGenerator.generateStrings(configuration);
......
package de.monticore.lang.monticar.emadl.generator;
package de.monticore.lang.monticar.emadl.generator.reinforcementlearning;
import de.monticore.lang.embeddedmontiarc.embeddedmontiarc._symboltable.instanceStructure.EMAComponentInstanceSymbol;
import de.monticore.lang.monticar.cnnarch.gluongenerator.reinforcement.RewardFunctionSourceGenerator;
import de.monticore.lang.monticar.emadl.generator.EMADLAbstractSymtab;
import de.monticore.lang.monticar.generator.cpp.GeneratorEMAMOpt2CPP;
import de.monticore.lang.tagging._symboltable.TaggingResolver;
import de.se_rwth.commons.logging.Log;
......@@ -9,16 +10,13 @@ import de.se_rwth.commons.logging.Log;
import java.io.IOException;
import java.util.Optional;
public class RewardFunctionCppGenerator implements RewardFunctionSourceGenerator {
public class RewardFunctionCppGenerator implements RewardFunctionSourceGenerator{
public RewardFunctionCppGenerator() {
}
@Override
public void generate(String modelPath, String rootModel, String targetPath) {
GeneratorEMAMOpt2CPP generator = new GeneratorEMAMOpt2CPP();
generator.useArmadilloBackend();
TaggingResolver taggingResolver = EMADLAbstractSymtab.createSymTabAndTaggingResolver(modelPath);
@Override
public EMAComponentInstanceSymbol resolveSymbol(TaggingResolver taggingResolver, String rootModel) {
Optional<EMAComponentInstanceSymbol> instanceSymbol = taggingResolver
.<EMAComponentInstanceSymbol>resolve(rootModel, EMAComponentInstanceSymbol.KIND);
......@@ -27,12 +25,34 @@ public class RewardFunctionCppGenerator implements RewardFunctionSourceGenerator
+ rootModel);
}
return instanceSymbol.get();
}
@Override
public void generate(EMAComponentInstanceSymbol componentInstanceSymbol, TaggingResolver taggingResolver,
String targetPath) {
GeneratorEMAMOpt2CPP generator = new GeneratorEMAMOpt2CPP();
generator.useArmadilloBackend();
generator.setGenerationTargetPath(targetPath);
try {
generator.generate(instanceSymbol.get(), taggingResolver);
generator.generate(componentInstanceSymbol, taggingResolver);
} catch (IOException e) {
Log.error("Generation of reward function is not possible: " + e.getMessage());
}
}
@Override
public void generate(String modelPath, String rootModel, String targetPath) {
TaggingResolver taggingResolver = createTaggingResolver(modelPath);
EMAComponentInstanceSymbol instanceSymbol = resolveSymbol(taggingResolver, rootModel);
generate(instanceSymbol, taggingResolver, targetPath);
}
@Override
public TaggingResolver createTaggingResolver(final String modelPath) {
return EMADLAbstractSymtab.createSymTabAndTaggingResolver(modelPath);
}
}
......@@ -275,8 +275,8 @@ public class GenerationTest extends AbstractSymtabTest {
"HelperA.h",
"start_training.sh",
"reinforcement_learning/__init__.py",
"reinforcement_learning/CNNCreator_MountaincarCritic.py",
"reinforcement_learning/CNNNet_MountaincarCritic.py",
"reinforcement_learning/CNNCreator_mountaincar_agent_mountaincarCritic.py",
"reinforcement_learning/CNNNet_mountaincar_agent_mountaincarCritic.py",
"reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
......
......@@ -17,7 +17,7 @@ configuration CartPoleDQN {
use_double_dqn : false
loss : euclidean
loss : huber
replay_memory : buffer{
memory_size : 10000
......
implementation Critic(state, action) {
(state ->
FullyConnected(units=400) ->
Relu() ->
FullyConnected(units=300)
|
action ->
FullyConnected(units=300)
) ->
Add() ->
Relu();
}
\ No newline at end of file
package mountaincar.agent;
component MountaincarCritic {
ports
in Q^{2} state,
in Q(-1:1)^{1} action,
out Q(-oo:oo)^{1} qvalues;
implementation CNN {
(
state ->
FullyConnected(units=400) ->
Relu() ->
FullyConnected(units=300)
|
action ->
FullyConnected(units=300)
) ->
Add() ->
Relu() ->
FullyConnected(units=1) ->
qvalues;
}
}
\ No newline at end of file
......@@ -23,7 +23,7 @@ configuration TorcsDQN {
use_double_dqn : true
loss : euclidean
loss : huber
replay_memory : buffer{
memory_size : 1000000
......
......@@ -3,8 +3,9 @@ import h5py
import mxnet as mx
import logging
import sys
from mxnet import nd
class cartpole_master_dqnDataLoader:
class CNNDataLoader_cartpole_master_dqn:
_input_names_ = ['state']
_output_names_ = ['qvalues_label']
......@@ -14,21 +15,38 @@ class cartpole_master_dqnDataLoader:
def load_data(self, batch_size):
train_h5, test_h5 = self.load_h5_files()
data_mean = train_h5[self._input_names_[0]][:].mean(axis=0)
data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5
train_data = {}
data_mean = {}
data_std = {}
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_label = {}
for output_name in self._output_names_:
train_label[output_name] = train_h5[output_name]
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]],
train_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
test_iter = None
if test_h5 != None:
test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]],
test_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
test_data = {}
for input_name in self._input_names_:
test_data[input_name] = test_h5[input_name]
test_label = {}
for output_name in self._output_names_:
test_label[output_name] = test_h5[output_name]
test_iter = mx.io.NDArrayIter(data=test_data,
label=test_label,
batch_size=batch_size)
return train_iter, test_iter, data_mean, data_std
def load_h5_files(self):
......@@ -36,21 +54,39 @@ class cartpole_master_dqnDataLoader:
test_h5 = None
train_path = self._data_dir + "train.h5"
test_path = self._data_dir + "test.h5"
if os.path.isfile(train_path):
train_h5 = h5py.File(train_path, 'r')
if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5):
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
for input_name in self._input_names_:
if not input_name in train_h5:
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the dataset "
+ "'" + input_name + "'")
sys.exit(1)
test_iter = None
for output_name in self._output_names_:
if not output_name in train_h5:
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the dataset "
+ "'" + output_name + "'")
sys.exit(1)
if os.path.isfile(test_path):
test_h5 = h5py.File(test_path, 'r')
if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5):
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
for input_name in self._input_names_:
if not input_name in test_h5:
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the dataset "
+ "'" + input_name + "'")
sys.exit(1)
for output_name in self._output_names_:
if not output_name in test_h5:
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the dataset "
+ "'" + output_name + "'")
sys.exit(1)
else:
logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.")
return train_h5, test_h5
else:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
......
......@@ -101,7 +101,6 @@ class Net_0(gluon.HybridBlock):
self.fc3_ = gluon.nn.Dense(units=2, use_bias=True)
# fc3_, output shape: {[2,1,1]}
self.last_layers['qvalues'] = 'linear'
def hybrid_forward(self, F, state):
......
......@@ -56,7 +56,7 @@ if __name__ == "__main__":
'memory_size': 10000,
'sample_size': 32,
'state_dtype': 'float32',
'action_dtype': 'float32',
'action_dtype': 'uint8',
'rewards_dtype': 'float32'
},
'strategy_params': {
......@@ -78,10 +78,10 @@ if __name__ == "__main__":
'snapshot_interval': 20,
'max_episode_step': 250,
'target_score': 185.5,
'qnet':qnet_creator.net,
'qnet':qnet_creator.networks[0],
'use_fix_target': True,
'target_update_interval': 200,
'loss_function': 'euclidean',
'loss_function': 'huber',
'optimizer': 'rmsprop',
'optimizer_params': {
'learning_rate': 0.001 },
......@@ -108,4 +108,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
agent.export_best_network(path=qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
epsilon_decay_per_step=False,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
sigma=0.3,
noise_variance=0.1
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
decay_start=epsilon_decay_start,
decay_per_step=epsilon_decay_per_step)
else:
decay = NoDecay()
......@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return OrnsteinUhlenbeckStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
elif method == 'gaussian':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert noise_variance is not None
return GaussianNoiseStrategy(action_dim, action_low, action_high,
epsilon, noise_variance, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
......@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
def __init__(self, eps_decay, min_eps=0, decay_start=0, decay_per_step=False):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
self.decay_per_step = decay_per_step
self.last_episode = -1
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
def do_decay(self, episode):
if self.decay_per_step:
do = (episode >= self.decay_start)
else:
do = ((self.last_episode != episode) and (episode >= self.decay_start))
self.last_episode = episode
return do
def decay(self, cur_eps, episode):
if self.do_decay(episode):
return max(cur_eps - self.eps_decay, self.min_eps)
else:
return cur_eps
class BaseStrategy(object):
......@@ -168,5 +188,31 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
def select_action(self, values):
noise = self._evolve_state()
action = values + (self.cur_eps * noise)
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
class GaussianNoiseStrategy(BaseStrategy):
def __init__(
self,
action_dim,
action_low,
action_high,
eps,
noise_variance,
decay=NoDecay()
):
super(GaussianNoiseStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
self._action_dim = action_dim
self._action_low = action_low
self._action_high = action_high
self._noise_variance = noise_variance
def select_action(self, values):
noise = np.random.normal(loc=0.0, scale=self._noise_variance, size=self._action_dim)
action = values + self.cur_eps * noise
return np.clip(action, self._action_low, self._action_high)
......@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS = {
'l1': gluon.loss.L1Loss(),
'euclidean': gluon.loss.L2Loss(),
'huber_loss': gluon.loss.HuberLoss(),
'l2': gluon.loss.L2Loss(),
'huber': gluon.loss.HuberLoss(),
'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}
......@@ -127,13 +127,15 @@ class TrainingStats(object):
else:
return self._all_total_rewards[0]
def save(self, path):
np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards)
np.save(os.path.join(path, 'eps'), self._all_eps)
np.save(os.path.join(path, 'time'), self._all_time)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards[:episode])
np.save(os.path.join(path, 'eps'), self._all_eps[:episode])
np.save(os.path.join(path, 'time'), self._all_time[:episode])
np.save(
os.path.join(path, 'mean_reward'),
self._all_mean_reward_last_100_episodes)
self._all_mean_reward_last_100_episodes[:episode])
def _log_episode(self, episode, start_time, training_steps, eps, reward):
self.add_eps(episode, eps)
......@@ -170,33 +172,43 @@ class DqnTrainingStats(TrainingStats):
self._logger.info(info)
return avg_reward
def save_stats(self, path):
def save_stats(self, path, episode=None):
if episode is None:
episode = self._max_episodes
all_total_rewards = self._all_total_rewards[:episode]
all_avg_loss = self._all_avg_loss[:episode]
all_eps = self._all_eps[:episode]
all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
fig = plt.figure(figsize=(20, 20))
sub_rewards = fig.add_subplot(221)
sub_rewards.set_title('Total Rewards per episode')
sub_rewards.plot(
np.arange(self._max_episodes), self._all_total_rewards)
np.arange(episode), all_total_rewards)
sub_loss = fig.add_subplot(222)
sub_loss.set_title('Avg. Loss per episode')
sub_loss.plot(np.arange(self._max_episodes), self._all_avg_loss)
sub_loss.plot(np.arange(episode), all_avg_loss)
sub_eps = fig.add_subplot(223)
sub_eps.set_title('Epsilon per episode')
sub_eps.plot(np.arange(self._max_episodes), self._all_eps)
sub_eps.plot(np.arange(episode), all_eps)
sub_rewards = fig.add_subplot(224)
sub_rewards.set_title('Avg. mean reward of last 100 episodes')
sub_rewards.plot(np.arange(self._max_episodes),
self._all_mean_reward_last_100_episodes)
sub_rewards.plot(np.arange(episode),
all_mean_reward_last_100_episodes)
self.save(path)
self.save(path, episode=episode)
plt.savefig(os.path.join(path, 'stats.pdf'))
def save(self, path):
super(DqnTrainingStats, self).save(path)
np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
super(DqnTrainingStats, self).save(path, episode=episode)
np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss[:episode])
class DdpgTrainingStats(TrainingStats):
......@@ -233,44 +245,56 @@ class DdpgTrainingStats(TrainingStats):
self.logger.info(info)
return avg_reward
def save(self, path):
super(DdpgTrainingStats, self).save(path)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
super(DdpgTrainingStats, self).save(path, episode=episode)
np.save(os.path.join(
path, 'avg_critic_loss'), self._all_avg_critic_loss)
np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss)
np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues)
path, 'avg_critic_loss'), self._all_avg_critic_loss[:episode])
np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss[:episode])
np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues[:episode])
def save_stats(self, path, episode=None):
if episode is None:
episode = self._max_episodes
all_total_rewards = self._all_total_rewards[:episode]
all_avg_actor_loss = self._all_avg_actor_loss[:episode]
all_avg_critic_loss = self._all_avg_critic_loss[:episode]
all_avg_qvalues = self._all_avg_qvalues[:episode]
all_eps = self._all_eps[:episode]
all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
def save_stats(self, path):
fig = plt.figure(figsize=(120, 120))
sub_rewards = fig.add_subplot(321)
sub_rewards.set_title('Total Rewards per episode')
sub_rewards.plot(
np.arange(self._max_episodes), self._all_total_rewards)
np.arange(episode), all_total_rewards)
sub_actor_loss = fig.add_subplot(322)
sub_actor_loss.set_title('Avg. Actor Loss per episode')
sub_actor_loss.plot(
np.arange(self._max_episodes), self._all_avg_actor_loss)
np.arange(episode), all_avg_actor_loss)
sub_critic_loss = fig.add_subplot(323)
sub_critic_loss.set_title('Avg. Critic Loss per episode')
sub_critic_loss.plot(
np.arange(self._max_episodes), self._all_avg_critic_loss)
np.arange(episode), all_avg_critic_loss)
sub_qvalues = fig.add_subplot(324)
sub_qvalues.set_title('Avg. QValues per episode')
sub_qvalues.plot(
np.arange(self._max_episodes), self._all_avg_qvalues)
np.arange(episode), all_avg_qvalues)
sub_eps = fig.add_subplot(325)
sub_eps.set_title('Epsilon per episode')
sub_eps.plot(np.arange(self._max_episodes), self._all_eps)
sub_eps.plot(np.arange(episode), all_eps)
sub_rewards = fig.add_subplot(326)
sub_rewards.set_title('Avg. mean reward of last 100 episodes')
sub_rewards.plot(np.arange(self._max_episodes),
self._all_mean_reward_last_100_episodes)