Commit e2013174 authored by lr119628's avatar lr119628
Browse files

[update] logging

parent 6229f294
......@@ -13,6 +13,7 @@ import sys
import inspect
from mxnet import gluon, autograd, nd
<#if tc.containsAdaNet()>
from typing import List
from mxnet.gluon.loss import Loss, SigmoidBCELoss
from mxnet.ndarray import add, concatenate
</#if>
......@@ -294,7 +295,7 @@ class BLEU(mx.metric.EvalMetric):
return new_list
<#if tc.containsAdaNet()>
def objective_function(model, data, loss, gamma=.0001) -> float:
def objective_function(model, data, loss, gamma=.1) -> float:
"""
:param model:
:param trainer:
......@@ -341,7 +342,7 @@ class CandidateTrainingloss(Loss):
batch_axis=0,
alpha=.07,
beta=0.0001,
gamma=.0001,
gamma=.1,
**kwargs):
"""
loss function which is used to train each candidate
......@@ -351,15 +352,17 @@ class CandidateTrainingloss(Loss):
to the candidate weights
"""
super(CandidateTrainingloss, self).__init__(weight, batch_axis, **kwargs)
self.a = alpha
self.b = beta
self.g = gamma
self.a = alpha # weight for the rade macher approximation
self.b = beta # fixed added value to the weightend rademacher approximation
self.g = gamma # weight of the combined complexity of L1 and rademacher approximation
self.coreLoss = loss # in template, the loss function is passed initialized!!!!
self.model = candidate # candidate to be trained
# noinspection PyMethodOverriding
def hybrid_forward(self, F, x, label, *args, **kwargs):
l1 = calculate_l1(self.model.collect_params()) * self.g
l1 = calculate_l1(self.model.collect_params())
# calculate regularization term reg
# reg = (alpha*r)+beta , r = rademacher complexity approximation
......@@ -371,7 +374,7 @@ class CandidateTrainingloss(Loss):
# calculate the actual loss and add the regularization term
l = self.coreLoss(x, label)
ad = F.multiply(F.ones(l.shape), reg_term)
ad = F.multiply(F.ones(l.shape), reg_term* self.g)
res = F.add(l, ad)
......@@ -383,7 +386,7 @@ class AdaLoss(Loss):
objective function of the whole model
"""
def __init__(self, weight=None, model=None, loss=SigmoidBCELoss, loss_args=(True,), batch_axis=0, lamb=0.0001,gamma=.0001,
def __init__(self, weight=None, model=None, loss=SigmoidBCELoss, loss_args=(True,), batch_axis=0, lamb=0.0001,gamma=.1,
beta=.0001,
**kwargs):
super(AdaLoss, self).__init__(weight, batch_axis, **kwargs)
......@@ -396,19 +399,19 @@ class AdaLoss(Loss):
def hybrid_forward(self, F, x, label):
cl = self.coreLoss(x, label)
l1 = calculate_l1(self.model.out.collect_params())*self.g
l1 = calculate_l1(self.model.out.collect_params())
reg_term = F.sum(((self.lamb * self.c_complexities) + self.beta) * l1)
return F.add(cl, reg_term)
return F.add(cl, reg_term*self.g)
def fitComponent(trainIter: mx.io.NDArrayIter, trainer: mx.gluon.Trainer, epochs: int, component: gluon.HybridBlock,
loss_class: gluon.loss, loss_params: dict,model_flag:bool,batch_size:int,log_period=100) -> None:
loss_class: gluon.loss, loss_params: dict,model_flag:bool,batch_size:int,log_period=100) -> List[float]:
"""
function trains a component of the generated model.
expects a compoment, a trainern instance with corresponding parameters.
"""
loss_list = []
loss = loss_class(**loss_params)
for epoch in range(epochs):
trainIter.reset()
......@@ -424,21 +427,23 @@ def fitComponent(trainIter: mx.io.NDArrayIter, trainer: mx.gluon.Trainer, epochs
error.backward()
trainer.step(data.shape[0], ignore_stale_grad=True)
if batch_i%log_period==0:
loss_avg = error.mean().asscalar()
logging.info("Epoch[%d] Batch[%d] training a %s avgLoss: %.5f" % (epoch, batch_i,'model'if model_flag else 'candidate',loss_avg))
# if batch_i%log_period==0:
loss_avg = error.mean().asscalar()
loss_list.append(loss_avg)
#logging.info("In Epoch[%d] trained %s with average Loss: %.5f" % (epoch, 'model'if model_flag else component.name_,loss_avg))
return loss_list
def train_candidate(candidate,epochs:int,optimizer:str,optimizer_params:dict,trainIter,loss:Loss,batch_size:int)->None:
def train_candidate(candidate,epochs:int,optimizer:str,optimizer_params:dict,trainIter,loss:Loss,batch_size:int)->List[float]:
candidate_trainer = get_trainer(optimizer,candidate.collect_params(),optimizer_params)
fitComponent(trainIter=trainIter, trainer=candidate_trainer, epochs=epochs, component=candidate,
return fitComponent(trainIter=trainIter, trainer=candidate_trainer, epochs=epochs, component=candidate,
loss_class=CandidateTrainingloss, loss_params={'loss': loss, 'candidate': candidate},model_flag=False,batch_size=batch_size)
def train_model(candidate,epochs:int,optimizer:str,optimizer_params:dict,trainIter,loss:Loss,batch_size:int)->None:
def train_model(candidate,epochs:int,optimizer:str,optimizer_params:dict,trainIter,loss:Loss,batch_size:int)->List[float]:
params = candidate.out.collect_params()
if candidate.finalout is not None:
params.update(candidate.finalout.collect_params())
model_trainer = get_trainer(optimizer, params, optimizer_params)
fitComponent(trainIter=trainIter, trainer=model_trainer, epochs=epochs, component=candidate,
return fitComponent(trainIter=trainIter, trainer=model_trainer, epochs=epochs, component=candidate,
loss_class=AdaLoss, loss_params={'loss': loss, 'model': candidate},model_flag=True,batch_size=batch_size)
......@@ -464,7 +469,7 @@ def fit(loss: gluon.loss.Loss,
ctx=None,
logging=None
) -> gluon.HybridBlock:
logging.info(f"AdaNet: starting epochs:{epochs} batch_size:{batch_size} ...")
logging.info(f"AdaNet: starting with {epochs} epochs and batch_size:{batch_size} ...")
cg = dataClass.Builder(batch_size=batch_size)
model_template = dataClass.model_template
model_operations = {}
......@@ -486,13 +491,13 @@ def fit(loss: gluon.loss.Loss,
for rnd in range(T):
# get new candidates
candidates = cg.get_candidates()
can_count = len([x for x in candidates])
model_data = {}
for name,candidate in candidates.items():
logging.info(f"working on candidate {name}")
model_eval = {}
candidate.initialize(ctx=ctx)
candidate.hybridize()
train_candidate(candidate,epochs,optimizer,optimizer_params,train_iter,loss,batch_size=batch_size)
candidate_loss = train_candidate(candidate,epochs,optimizer,optimizer_params,train_iter,loss,batch_size=batch_size)
model_name = name+ '_model'
# add the current candidate as operation
......@@ -507,11 +512,14 @@ def fit(loss: gluon.loss.Loss,
candidate_model.finalout.initialize(ctx=ctx)
candidate_model.hybridize()
train_model(candidate_model,epochs,optimizer,optimizer_params,train_iter,loss,batch_size=batch_size)
model_loss = train_model(candidate_model,epochs,optimizer,optimizer_params,train_iter,loss,batch_size=batch_size)
objective_score = objective_function(model=candidate_model, data=train_iter, loss=loss)
model_eval['model'] = candidate_model
model_eval['score'] = objective_function(model=candidate_model, data=train_iter, loss=loss)
model_eval['score'] = objective_score
model_eval['operation'] = candidate
model_data[model_name] = model_eval
logging_msg = "candidate average loss:{:.5f} model average loss:{:.5f} objective score: {:.5f}".format(np.mean(candidate_loss),np.mean(model_loss),objective_score.asscalar())
logging.info(logging_msg)
min_name = None
min_score = None
......@@ -545,7 +553,7 @@ def fit(loss: gluon.loss.Loss,
cg.update()
round_msg = 'AdaNet:round: {}/{} finished,'.format(rnd + 1, T)
improvement = (1 - (model_score / old_score).asscalar()) * 100
score_msg = 'current model score:{:.5f} improvement {:.5f}%'.format(model_score.asscalar(), improvement)
score_msg = 'current model score:{:.5f} improvement {:.5f}% current model node count:{}'.format(model_score.asscalar(), improvement,model.get_node_count())
logging.info(round_msg + score_msg)
return model
......@@ -709,7 +717,9 @@ class ${tc.fileNameWithoutEnding}:
ctx=mx_context[0],
logging=logging
)
logging.info(self._networks[0])
logging.info(self._networks[${networkInstruction?index}])
logging.info(f"node count: {self._networks[${networkInstruction?index}].get_node_count()}")
if optimizer == "adamw":
trainers = [mx.gluon.Trainer(network.collect_params(), AdamW.AdamW(**optimizer_params)) for network in self._networks.values() if len(network.collect_params().values()) != 0]
else:
......
......@@ -212,7 +212,13 @@ class Builder:
"""
:returns tuple of two candidate networks the first is of the same size as the previous the the other is depth+1
"""
candidates = {}
for i in range(self.pre_stack+1):
name = f'candidate{i}round{self.round}'
candidate = CandidateHull(name=name,input=self.input,output=self.output, stack=i+1,
block_args=self.block_params,batch_size=self.batch_size)
candidates.setdefault(name,candidate)
"""
c0_name = f'candidate0round{self.round}'
c1_name = f'candidate1round{self.round}'
......@@ -224,7 +230,9 @@ class Builder:
self.step += 1
return {c0.name_:c0, c1.name_:c1}
"""
return candidates
def update(self, up=1):
"""
:param up, increases the stack height by up
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment