Commit 108af63d authored by Kirhan, Cihad's avatar Kirhan, Cihad
Browse files

ConfLang integration

parent d1f56349
Pipeline #444536 failed with stage
in 1 minute and 45 seconds
......@@ -4,10 +4,12 @@ package de.monticore.lang.monticar.emadl.generator;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Maps;
import com.google.common.io.Resources;
import conflang._ast.ASTComponentLiteral;
import conflang._ast.ASTSimpleConfigurationEntry;
import conflang._symboltable.*;
import conflangliterals._ast.ASTComponentLiteral;
import conflangliterals._ast.ASTTypelessLiteral;
import de.monticore.io.paths.ModelPath;
import de.monticore.lang.embeddedmontiarc.embeddedmontiarc._symboltable.cncModel.EMAComponentSymbol;
import de.monticore.lang.embeddedmontiarc.embeddedmontiarc._symboltable.instanceStructure.EMAComponentInstanceSymbol;
......@@ -42,6 +44,7 @@ import de.monticore.lang.monticar.semantics.Constants;
import de.monticore.lang.monticar.semantics.util.BasicLibrary;
import de.monticore.lang.tagging._symboltable.TagSymbol;
import de.monticore.lang.tagging._symboltable.TaggingResolver;
import de.monticore.literals.literals._ast.ASTSignedLiteral;
import de.monticore.symboltable.GlobalScope;
import de.monticore.symboltable.Scope;
import de.monticore.symboltable.Symbol;
......@@ -803,6 +806,9 @@ public class EMADLGenerator implements EMAMGenerator {
// TODO Add method to ConfLangConfigurationSymbol to search for configuration entries of any kind
Optional<Symbol> criticSymbolOpt = configurationSymbol.getSpannedScope().resolve(ConfigEntryNameConstants.CRITIC, ConfigurationEntryKind.KIND);
Map<String, Object> architectureAdapterMap = Maps.newHashMap();
if (criticSymbolOpt.isPresent()) {
if (!criticSymbolOpt.get().isKindOf(SimpleConfigurationEntrySymbol.KIND)) {
// TODO
......@@ -811,8 +817,10 @@ public class EMADLGenerator implements EMAMGenerator {
String fullyQualifiedComponentName = constructFullyQualifiedComponentName(criticSymbol);
ArchitectureSymbol criticComponent = resolveComponentByConfiguration(fullyQualifiedComponentName);
architectureAdapterMap.put("critic_network", new ArchitectureAdapter(fullyQualifiedComponentName, criticComponent));
// CNNTRAIN PART
configuration.setCriticNetwork(new ArchitectureAdapter(fullyQualifiedComponentName, criticComponent)); // MISSING
configuration.setCriticNetwork(new ArchitectureAdapter(fullyQualifiedComponentName, criticComponent));
CNNTrainCocos.checkCriticCocos(configuration); // MISSING
}
......@@ -827,9 +835,10 @@ public class EMADLGenerator implements EMAMGenerator {
String fullyQualifiedComponentName = constructFullyQualifiedComponentName(discriminatorNameSymbol);
ArchitectureSymbol discriminatorComponent = resolveComponentByConfiguration(fullyQualifiedComponentName);
architectureAdapterMap.put("critic_network", new ArchitectureAdapter(fullyQualifiedComponentName, discriminatorComponent));
// CNNTRAIN PART
configuration.setCriticNetwork(new ArchitectureAdapter(fullyQualifiedComponentName, discriminatorComponent)); // MISSING
CNNTrainCocos.checkCriticCocos(configuration); // MISSING
configuration.setCriticNetwork(new ArchitectureAdapter(fullyQualifiedComponentName, discriminatorComponent));
}
Optional<Symbol> qNetworkNameSymbolOpt = configurationSymbol.getSpannedScope().resolve(ConfigEntryNameConstants.QNETWORK_NAME, ConfigurationEntryKind.KIND);
......@@ -841,9 +850,10 @@ public class EMADLGenerator implements EMAMGenerator {
String fullyQualifiedComponentName = constructFullyQualifiedComponentName(qNetworkNameSymbol);
ArchitectureSymbol qNetworkComponent = resolveComponentByConfiguration(fullyQualifiedComponentName);
architectureAdapterMap.put("critic_network", new ArchitectureAdapter(fullyQualifiedComponentName, qNetworkComponent));
// CNNTRAIN PART
configuration.setCriticNetwork(new ArchitectureAdapter(fullyQualifiedComponentName, qNetworkComponent)); // MISSING
CNNTrainCocos.checkCriticCocos(configuration); // MISSING
configuration.setCriticNetwork(new ArchitectureAdapter(fullyQualifiedComponentName, qNetworkComponent));
}
Optional<Symbol> preprocessingNameSymbolOpt = configurationSymbol.getSpannedScope().resolve(ConfigEntryNameConstants.PREPROCESSING_NAME, ConfigurationEntryKind.KIND);
......@@ -898,7 +908,7 @@ public class EMADLGenerator implements EMAMGenerator {
cnnTrainGenerator.setInstanceName(componentInstance.getFullName().replaceAll("\\.", "_"));
List<FileContent> fileContentMap = cnnTrainGenerator.generateStrings(configuration);
List<FileContent> fileContentList = cnnTrainGenerator.generateFileContents(configurationSymbol);
List<FileContent> fileContentList = cnnTrainGenerator.generateFileContents(configurationSymbol, architectureAdapterMap);
fileContents.addAll(fileContentMap);
fileContents.addAll(fileContentList);
}
......@@ -938,7 +948,7 @@ public class EMADLGenerator implements EMAMGenerator {
if (!discriminator.isPresent()) {
Log.error("During the resolving of discriminator component: Discriminator component "
+ fullyQualifiedComponentName + " does not have a CNN implementation, but is required to have one.");
System.exit(-1); // REMOVE THIS, USE EXCEPTION INSTEAD
System.exit(-1); // TODO REMOVE THIS, USE EXCEPTION INSTEAD
}
ArchitectureSymbol architectureSymbol = discriminator.get();
......@@ -948,7 +958,13 @@ public class EMADLGenerator implements EMAMGenerator {
private String constructFullyQualifiedComponentName(SimpleConfigurationEntrySymbol configurationEntrySymbol) {
ASTSimpleConfigurationEntry configurationEntry = (ASTSimpleConfigurationEntry) configurationEntrySymbol.getAstNode().get();
ASTComponentLiteral componentLiteral = (ASTComponentLiteral) configurationEntry.getValue();
ASTSignedLiteral value = configurationEntry.getValue();
if (value instanceof ASTTypelessLiteral) {
ASTTypelessLiteral typelessLiteral = (ASTTypelessLiteral) value;
return typelessLiteral.getValue();
}
ASTComponentLiteral componentLiteral = (ASTComponentLiteral) value;
List<String> componentNameParts = componentLiteral.getValue().getPartsList();
componentNameParts.set(componentNameParts.size() - 1, StringUtils.capitalize(componentNameParts.get(componentNameParts.size() - 1)));
return Joiner.on('.').join(componentNameParts);
......
......@@ -272,6 +272,7 @@ public class GenerationTest extends AbstractSymtabTest {
"CNNNet_mountaincar_master_actor.py",
"CNNPredictor_mountaincar_master_actor.h",
"CNNTrainer_mountaincar_master_actor.py",
"CNNTrainerConfLang_mountaincar_master_actor.py",
"CNNTranslator.h",
"HelperA.h",
"start_training.sh",
......
/* (c) https://github.com/MontiCore/monticore */
configuration Invariant{
num_epoch=10
batch_size=123
context=cpu
optimizer=adam{
learning_rate=0.01
learning_rate_decay=0.8
step_size=1000
weight_decay=0.0001
configuration Invariant {
num_epoch = 10
batch_size = 123
context = cpu
optimizer = adam {
learning_rate = 0.01
learning_rate_decay = 0.8
step_size = 1000
weight_decay = 0.0001
}
}
/* (c) https://github.com/MontiCore/monticore */
schema Invariant {
num_epoch: N0
batch_size: N0
context: enum {
cpu,
gpu;
}
optimizer: complex<optimizer>
complex optimizer {
instances:
adam;
define adam {
learning_rate: Q
learning_rate_decay: Q
step_size: N0
weight_decay: Q
}
}
}
/* (c) https://github.com/MontiCore/monticore */
configuration PreprocessingNetwork{
num_epoch = 1
batch_size = 1
log_period = 1
normalize = false
preprocessing_name = PreprocessingProcessing
context = cpu
load_checkpoint = false
optimizer = sgd {
learning_rate = 0.1
learning_rate_decay = 0.85
step_size = 1000
weight_decay = 0.0
}
}
/* (c) https://github.com/MontiCore/monticore */
schema PreprocessingNetwork{
num_epoch: N0
batch_size: N0
log_period: N0
normalize: B
preprocessing_name: component
context: enum {
cpu,
gpu;
}
load_checkpoint: B
optimizer: complex<optimizer>
complex optimizer {
instances:
sgd;
define sgd {
learning_rate: Q
learning_rate_decay: Q
step_size: N0
weight_decay: Q
}
}
}
/* (c) https = //github.com/MontiCore/monticore */
/* (c) https://github.com/MontiCore/monticore */
configuration DefaultGANGenerator{
learning_method = "gan"
learning_method = gan
discriminator_name = defaultGAN.DefaultGANDiscriminator
num_epoch = 10
batch_size = 64
normalize = false
context = "cpu"
context = cpu
noise_input = "noise"
print_images = true
log_period = 10
......
/* (c) https://github.com/MontiCore/monticore */
configuration InfoGANGenerator {
learning_method = gan
discriminator_name = infoGAN.InfoGANDiscriminator
qnet_name = infoGAN.InfoGANQNetwork
num_epoch = 5
batch_size = 64
normalize = false
noise_input = "noise"
context = cpu
load_checkpoint = false
optimizer = adam {
learning_rate = 0.0002
beta1 = 0.5
}
discriminator_optimizer = adam {
learning_rate = 0.0002
beta1 = 0.5
}
noise_distribution = gaussian {
mean_value = 0
spread_value = 1
}
log_period = 10
print_images = true
}
/* (c) https://github.com/MontiCore/monticore */
schema InfoGANGenerator {
learning_method: enum {
supervised,
reinforcement,
gan;
}
context: enum {
cpu,
gpu;
}
num_epoch: N0
batch_size: N0
discriminator_name: component
qnet_name: component
normalize: B
load_checkpoint: B
noise_input: S
log_period: Z
print_images: B
optimizer: complex<optimizer>
discriminator_optimizer: complex<optimizer>
noise_distribution: complex<noise_distribution>
complex optimizer {
instances:
adam,
rmsprop;
define adam {
learning_rate: Q
learning_rate_policy: enum {
fixed,
step,
exp,
inv,
poly,
sigmoid;
}
weight_decay: Q
epsilon: Q
beta1: Q
beta2: Q
}
define rmsprop {
learning_rate: Q
}
}
complex noise_distribution {
instances:
gaussian;
define gaussian {
mean_value: Z
spread_value: Z
}
}
}
\ No newline at end of file
/* (c) https://github.com/MontiCore/monticore */
configuration MountaincarActor {
context = "cpu"
learning_method = "reinforcement"
rl_algorithm = "ddpg-algorithm"
strategy = ornstein_uhlenbeck {
epsilon = 1.0
min_epsilon = 0.01
epsilon_decay_method = linear
epsilon_decay = 0.01
mu = (0.0)
theta = (0.15)
sigma = (0.3)
}
context = cpu
learning_method = reinforcement
rl_algorithm = ddpg
critic = mountaincar.agent.mountaincarCritic
environment = gym {
name = "MountainCarContinuous-v0"
}
num_episodes = 200
discount_factor = 0.999
num_max_steps = 1000
training_interval = 1
snapshot_interval = 20
replay_memory = buffer {
memory_size = 1000000
sample_size = 64
}
actor_optimizer = adam {
learning_rate = 0.0001
}
critic_optimizer = adam {
learning_rate = 0.001
}
}
/* (c) https://github.com/MontiCore/monticore */
package de.monticore.lang.schemalang;
// TODO: Requirement: packages, modularization
schema MountaincarActor {
context: enum {
cpu,
gpu;
}
learning_method: enum {
supervised,
reinforcement,
gan;
}
rl_algorithm: enum {
dqn,
ddpg,
td3;
}
critic: Component
num_episodes: N0
target_score: Q
discount_factor: Q
num_max_steps: N0
training_interval: N0
use_fix_target_network: B
target_network_update_interval: N0
snapshot_interval: N0
use_double_dqn: B
environment: complex<environment>
loss: complex<loss>
replay_memory: complex<replay_memory>
strategy: complex<strategy>
optimizer: complex<optimizer>
actor_optimizer: complex<optimizer>
critic_optimizer: complex<optimizer>
complex environment {
instances:
gym;
define gym {
name: S
}
}
complex loss {
instances:
huber;
}
complex replay_memory {
instances:
buffer;
define buffer {
memory_size: N0 // TODO: range in EMA
sample_size: N0
}
}
complex strategy {
instances:
epsgreedy,
ornstein_uhlenbeck;
define epsgreedy {
epsilon: Q
min_epsilon: Q
epsilon_decay_method: enum {
linear;
}
epsilon_decay: Q
}
define ornstein_uhlenbeck {
epsilon: Q
min_epsilon: Q
epsilon_decay_method: enum {
linear;
}
epsilon_decay: Q
mu: Q*
theta: Q*
sigma: Q*
}
}
complex optimizer {
instances:
adam,
rmsprop;
define adam {
learning_rate: Q
learning_rate_policy: enum {
fixed,
step,
exp,
inv,
poly,
sigmoid;
}
weight_decay: Q
epsilon: Q
beta1: Q
beta2: Q
}
define rmsprop {
learning_rate: Q
}
}
}
from reinforcement_learning.agent import DdpgAgent
from reinforcement_learning.util import AgentSignalHandler
from reinforcement_learning.cnnarch_logger import ArchLogger
from reinforcement_learning.CNNCreator_mountaincar_agent_mountaincarCritic import CNNCreator_mountaincar_agent_mountaincarCritic
import reinforcement_learning.environment
import CNNCreator_mountaincar_master_actor
import os
import sys
import re
import time
import numpy as np
import mxnet as mx
def resume_session(sessions_dir):
resume_session = False
resume_directory = None
if os.path.isdir(sessions_dir):
regex = re.compile(r'\d\d\d\d-\d\d-\d\d-\d\d-\d\d')
dir_content = os.listdir(sessions_dir)
session_files = filter(regex.search, dir_content)
session_files.sort(reverse=True)
for d in session_files:
interrupted_session_dir = os.path.join(sessions_dir, d, '.interrupted_session')
if os.path.isdir(interrupted_session_dir):
resume = raw_input('Interrupted session from {} found. Do you want to resume? (y/n) '.format(d))
if resume == 'y':
resume_session = True
resume_directory = interrupted_session_dir
break
return resume_session, resume_directory
if __name__ == "__main__":
agent_name = 'mountaincar_master_actor'
# Prepare output directory and logger
all_output_dir = os.path.join('model', agent_name)
output_directory = os.path.join(
all_output_dir,
time.strftime('%Y-%m-%d-%H-%M-%S',
time.localtime(time.time())))
ArchLogger.set_output_directory(output_directory)
ArchLogger.set_logger_name(agent_name)
ArchLogger.set_output_level(ArchLogger.INFO)
env = reinforcement_learning.environment.GymEnvironment('MountainCarContinuous-v0')
context = mx.cpu()
initializer = mx.init.Normal()
critic_initializer = mx.init.Normal()
actor_creator = CNNCreator_mountaincar_master_actor.CNNCreator_mountaincar_master_actor()
actor_creator.setWeightInitializer(initializer)
actor_creator.construct(context)
critic_creator = CNNCreator_mountaincar_agent_mountaincarCritic()
critic_creator.setWeightInitializer(critic_initializer)
critic_creator.construct(context)
agent_params = {
'environment': env,
'replay_memory_params': {
'method': 'buffer',
'memory_size': 1000000,
'sample_size': 64,
'state_dtype': 'float32',
'action_dtype': 'float32',
'rewards_dtype': 'float32'
},
'strategy_params': {
'method':'ornstein_uhlenbeck',
'epsilon': 1,
'min_epsilon': 0.01,
'epsilon_decay_method': 'linear',
'epsilon_decay': 0.01,
'action_low': -1,
'action_high': 1,
'mu': [0],
'theta': [0.15],
'sigma': [0.3],
},
'agent_name': agent_name,
'verbose': True,
'output_directory': output_directory,
'state_dim': (2,),
'action_dim': (1,),
'ctx': 'cpu',
'discount_factor': 0.999,
'training_episodes': 200,
'train_interval': 1,