Commit 89991f25 authored by lr119628's avatar lr119628
Browse files

[update] more modular

parent f0ccd919
......@@ -9,7 +9,7 @@ import sys
from mxnet import gluon, nd
<#if tc.containsAdaNet()>
from mxnet.gluon import nn, HybridBlock
from numpy import log, product,prod
from numpy import log, product,prod,sqrt
from mxnet.ndarray import zeros,zeros_like
</#if>
<#if tc.architecture.customPyFilesPath??>
......@@ -563,9 +563,10 @@ class Net_${networkInstruction?index}(gluon.HybridBlock):
self.__setattr__(name,operation)
self.op_names.append(name)
self.candidate_complexities[name] = operation.get_complexity()
self.out = nn.Dense(units=self.classes,activation=None,flatten=False)
self.out = nn.Dense(units=self.classes,activation=None,flatten=True)
if self.fout:
self.finalout = self.fout()
pass
def hybrid_forward(self, F, x):
res_list = []
......
......@@ -294,7 +294,7 @@ class BLEU(mx.metric.EvalMetric):
return new_list
<#if tc.containsAdaNet()>
def objective_function(model, data, loss, gamma=.0000001) -> float:
def objective_function(model, data, loss, gamma=.0001) -> float:
"""
:param model:
:param trainer:
......@@ -383,11 +383,11 @@ class AdaLoss(Loss):
objective function of the whole model
"""
def __init__(self, weight=None, model=None, loss=SigmoidBCELoss, loss_args=(True,), batch_axis=0, lamb=0.0001,
def __init__(self, weight=None, model=None, loss=SigmoidBCELoss, loss_args=(True,), batch_axis=0, lamb=0.0001,gamma=.0001,
beta=.0001,
**kwargs):
super(AdaLoss, self).__init__(weight, batch_axis, **kwargs)
self.g = gamma
self.coreLoss = loss
self.model = model
self.c_complexities = self.model.get_candidate_complexity() # get candidate complexities
......@@ -396,13 +396,13 @@ class AdaLoss(Loss):
def hybrid_forward(self, F, x, label):
cl = self.coreLoss(x, label)
l1 = calculate_l1(self.model.out.collect_params())
l1 = calculate_l1(self.model.out.collect_params())*self.g
reg_term = F.sum(((self.lamb * self.c_complexities) + self.beta) * l1)
return F.add(cl, reg_term)
def fitComponent(trainIter: mx.io.NDArrayIter, trainer: mx.gluon.Trainer, epochs: int, component: gluon.HybridBlock,
loss_class: gluon.loss, loss_params: dict,model_flag:bool) -> None:
loss_class: gluon.loss, loss_params: dict,model_flag:bool,batch_size:int,log_period=100) -> None:
"""
function trains a component of the generated model.
expects a compoment, a trainern instance with corresponding parameters.
......@@ -424,11 +424,28 @@ def fitComponent(trainIter: mx.io.NDArrayIter, trainer: mx.gluon.Trainer, epochs
error.backward()
trainer.step(data.shape[0], ignore_stale_grad=True)
if batch_i%log_period==0:
loss_avg = error.mean().asscalar()
logging.info("Epoch[%d] Batch[%d] training a %s avgLoss: %.5f" % (epoch, batch_i,'model'if model_flag else 'candidate',loss_avg))
def train_candidate(candidate,epochs:int,optimizer:str,optimizer_params:dict,trainIter,loss:Loss,batch_size:int)->None:
candidate_trainer = get_trainer(optimizer,candidate.collect_params(),optimizer_params)
fitComponent(trainIter=trainIter, trainer=candidate_trainer, epochs=epochs, component=candidate,
loss_class=CandidateTrainingloss, loss_params={'loss': loss, 'candidate': candidate},model_flag=False,batch_size=batch_size)
def train_model(candidate,epochs:int,optimizer:str,optimizer_params:dict,trainIter,loss:Loss,batch_size:int)->None:
params = candidate.out.collect_params()
if candidate.finalout is not None:
params.update(candidate.finalout.collect_params())
model_trainer = get_trainer(optimizer, params, optimizer_params)
fitComponent(trainIter=trainIter, trainer=model_trainer, epochs=epochs, component=candidate,
loss_class=AdaLoss, loss_params={'loss': loss, 'model': candidate},model_flag=True,batch_size=batch_size)
def get_trainer(optimizer: str, parameters: dict, optimizer_params: dict) -> mx.gluon.Trainer:
# gluon.Trainer doesnt take a ctx
if optimizer == 'Adamw':
trainer = mx.gluon.Trainer(parameters, AdamW.AdamW(**optimizer_params), )
trainer = mx.gluon.Trainer(parameters, AdamW.AdamW(**optimizer_params) )
else:
trainer = mx.gluon.Trainer(parameters, optimizer, optimizer_params)
return trainer
......@@ -447,7 +464,7 @@ def fit(loss: gluon.loss.Loss,
ctx=None,
logging=None
) -> gluon.HybridBlock:
logging.info("AdaNet: starting ...")
logging.info(f"AdaNet: starting epochs:{epochs} batch_size:{batch_size} ...")
cg = dataClass.Builder(batch_size=batch_size)
model_template = dataClass.model_template
model_operations = {}
......@@ -468,64 +485,45 @@ def fit(loss: gluon.loss.Loss,
for rnd in range(T):
# get new candidates
c0, c1 = cg.get_candidates()
c0.initialize(ctx=ctx)
c1.initialize(ctx=ctx)
c0.hybridize()
c1.hybridize()
# train candidate 0
c0_trainer = get_trainer(optimizer, c0.collect_params(), optimizer_params)
fitComponent(trainIter=train_iter, trainer=c0_trainer, epochs=epochs, component=c0,
loss_class=CandidateTrainingloss, loss_params={'loss': loss, 'candidate': c0},model_flag=False)
# train candidate 1
c1_trainer = get_trainer(optimizer, c1.collect_params(), optimizer_params)
fitComponent(trainIter=train_iter, trainer=c1_trainer, epochs=epochs, component=c1,
loss_class=CandidateTrainingloss, loss_params={'loss': loss, 'candidate': c1},model_flag=False)
# create model with candidate 0 added -> c0_model
c0_work_op = model_operations.copy()
c0_work_op[c0.name] = c0
c0_model = model_template(operations=c0_work_op, batch_size=batch_size)
c0_model.out.initialize(ctx=ctx)
if c0_model.finalout:
c0_model.finalout.initialize(ctx=ctx)
c0_model.hybridize()
# create model with candidate 1 added -> c1_model
c1_work_op = model_operations.copy()
c1_work_op[c1.name] = c1
c1_model = model_template(operations=c1_work_op, batch_size=batch_size)
c1_model.out.initialize(ctx=ctx)
if c1_model.finalout:
c1_model.finalout.initialize(ctx=ctx)
c1_model.hybridize()
# train c0_model
params = c0_model.out.collect_params()
params.update(c0_model.finalout.collect_params())
c0_out_trainer = get_trainer(optimizer, params, optimizer_params)
fitComponent(trainIter=train_iter, trainer=c0_out_trainer, epochs=epochs, component=c0_model,
loss_class=AdaLoss, loss_params={'loss': loss, 'model': c0_model},model_flag=True)
# train c1_model
params = c1_model.out.collect_params()
params.update(c1_model.finalout.collect_params())
c1_out_trainer = get_trainer(optimizer, params, optimizer_params)
fitComponent(trainIter=train_iter, trainer=c1_out_trainer, epochs=epochs, component=c1_model,
loss_class=AdaLoss, loss_params={'loss': loss, 'model': c1_model},model_flag=True)
c0_score = objective_function(model=c1_model, data=train_iter, loss=loss)
c1_score = objective_function(model=c1_model, data=train_iter, loss=loss)
check = nd.greater_equal(c0_score, c1_score)
# decide which candidate yields the best improvement
model, operation, score = (c0_model, c0, c0_score) if check else (c1_model, c1, c1_score)
candidates = cg.get_candidates()
model_data = {}
for name,candidate in candidates.items():
logging.info(f"working on candidate {name}")
model_eval = {}
candidate.initialize(ctx=ctx)
candidate.hybridize()
train_candidate(candidate,epochs,optimizer,optimizer_params,train_iter,loss,batch_size=batch_size)
model_name = name+ '_model'
# add the current candidate as operation
candidate_op = model_operations.copy()
candidate_op[name] = candidate
# create new model
candidate_model = model_template(operations=candidate_op,batch_size=batch_size)
candidate_model.out.initialize(ctx=ctx)
if candidate_model.finalout:
candidate_model.finalout.initialize(ctx=ctx)
candidate_model.hybridize()
train_model(candidate_model,epochs,optimizer,optimizer_params,train_iter,loss,batch_size=batch_size)
model_eval['model'] = candidate_model
model_eval['score'] = objective_function(model=candidate_model, data=train_iter, loss=loss)
model_eval['operation'] = candidate
model_data[model_name] = model_eval
min_name = None
min_score = None
for name in model_data:
score = model_data[name]['score']
if min_score is None:
min_score = score
min_name = name
elif min_score > score:
min_name = name
min_score = score
model,operation,score = model_data[min_name]['model'],model_data[min_name]['operation'],model_data[min_name]['score']
if model_score is None:
model_score = score
......
......@@ -27,7 +27,8 @@ class DefaultBlock(gluon.HybridBlock):
def __init__(self, units=20, activation='relu', **kwargs):
super(DefaultBlock, self).__init__(**kwargs)
self.ag = nn.Dense(units=units, activation=activation)
with self.name_scope():
self.ag = nn.Dense(units=units, activation=activation)
def hybrid_forward(self, F, x):
return self.ag(x)
......@@ -50,7 +51,7 @@ class CandidateHull(gluon.HybridBlock):
<#else>
output_shape = None,
</#if>
inBlock=None,
input=None,
<#if inBlock.isPresent()>
<#if inBlock.get().isArtificial()>
input_shape =<#list inBlock.get().outputTypes as type>(${tc.join(type.dimensions, ",")})</#list>,
......@@ -66,7 +67,6 @@ class CandidateHull(gluon.HybridBlock):
**kwargs):
super(CandidateHull, self).__init__(**kwargs)
# assert issubclass(type(output), gluon.HybridBlock), f'output should inherit from {gluon.HybridBlock} got {type(output)}'
self.name_ = name
self.names = []
self.stack = stack
......@@ -78,6 +78,10 @@ class CandidateHull(gluon.HybridBlock):
self.input_shape = input_shape
self.model_shape = model_shape
self.classes = prod(list(self.model_shape))
if input:
self.input = input()
else:
self.input = None
if block_args is None:
body = {name + f'{i}': BuildingBlock() for i in range(self.stack)}
else:
......@@ -87,17 +91,14 @@ class CandidateHull(gluon.HybridBlock):
val = body[name]
self.__setattr__(name=name, value=val)
self.names.append(name)
self.finalOut = nn.Dense(units=self.classes, activation=None, flatten=False)
self.body = body
if inBlock:
self.input = inBlock()
else:
self.input = None
if output:
self.out = output()
else:
self.out = None
self.finalOut = nn.Dense(units=self.classes, activation=None, flatten=False)
def approximate_rade(self):
"""
approximate the rademacher complexity by the natural logarithm of the number of nodes within the candidate
......@@ -106,7 +107,7 @@ class CandidateHull(gluon.HybridBlock):
oc = 0
for name in self.names:
oc += self.__getattribute__(name).count_nodes()
self.rade = log(oc)
self.rade = sqrt(oc)
return self.rade
......@@ -205,8 +206,7 @@ class Builder:
self.pre_stack = 1
self.step = 0
self.block_params = None
def get_candidates(self):
def get_candidates(self)->dict:
"""
:returns tuple of two candidate networks the first is of the same size as the previous the the other is depth+1
"""
......@@ -214,13 +214,14 @@ class Builder:
c0_name = f'can0r{self.round}'
c1_name = f'can1r{self.round}'
c0 = CandidateHull(name=c0_name, stack=self.pre_stack,
c0 = CandidateHull(name=c0_name,input=self.input,output=self.output, stack=self.pre_stack,
block_args=self.block_params,batch_size=self.batch_size)
c1 = CandidateHull(name=c1_name, stack=self.pre_stack + 1,
c1 = CandidateHull(name=c1_name,input=self.input,output=self.output ,stack=self.pre_stack + 1,
block_args=self.block_params,batch_size=self.batch_size)
self.step += 1
return c0, c1
return {c0.name_:c0, c1.name_:c1}
def update(self, up=1):
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment