adjusted network loss to be computed before applying ArgMax layer

9f516b40 · Christian Fuß · dec0229d · 9f516b40 · 9f516b40 · 9f516b40
Commit 9f516b40 authored 5 years ago by Christian Fuß
--- a/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl
+++ b/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl
@@ -140,16 +140,16 @@ class ${tc.fileNameWithoutEnding}:
                </#list>

                with autograd.record():
-<#include "pythonExecute.ftl">
+<#include "pythonExecuteArgmax.ftl">

-                    loss = \
-<#list tc.architectureOutputs as output_name>
-                        loss_function(${output_name}, ${output_name}label)<#sep> + \
-</#list>
+                    loss = 0
+                    for element in lossList:
+                        loss = loss + element


                loss.backward()

+
                for trainer in trainers:
                    trainer.step(batch_size)


--- a/src/main/resources/templates/gluon/pythonExecuteArgmax.ftl
+++ b/src/main/resources/templates/gluon/pythonExecuteArgmax.ftl
+<#list tc.getLayerVariableMembers("batch_size")?keys as member>
+                    ${member} = mx.nd.zeros((${tc.join(tc.getLayerVariableMembers("batch_size")[member], ", ")},), ctx=mx_context)
+</#list>
+<#list tc.architectureOutputSymbols as output>
+                    ${tc.getName(output)} = mx.nd.zeros((batch_size, ${tc.join(output.ioDeclaration.type.dimensions, ", ")},), ctx=mx_context)
+</#list>
+
+                    lossList = []
+<#list tc.architecture.networkInstructions as networkInstruction>
+<#if networkInstruction.isUnroll()>
+<#list networkInstruction.toUnrollInstruction().resolvedBodies as resolvedBody>
+                    ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")})
+                    lossList.append(loss_function(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, ${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}label))
+                    <#list resolvedBody.elements as element>
+                    <#if element.name == "ArgMax">
+                    ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = mx.nd.argmax(${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")}, axis=1)
+                    </#if>
+                    </#list>
+</#list>
+<#else>
+<#if networkInstruction.body.isTrainable()>
+                    ${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ")})
+                    lossList.append(loss_function(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, ${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}label))
+                    <#list networkInstruction.body.elements as element>
+                    <#if element.name == "ArgMax">
+                    ${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")} = mx.nd.argmax(${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")}, axis=1)
+                    </#if>
+                    </#list>
+<#else>
+${tc.include(networkInstruction.body, "PYTHON_INLINE")}
+</#if>
+</#if>
+</#list>
\ No newline at end of file
--- a/src/test/resources/target_code/CNNSupervisedTrainer_Alexnet.py
+++ b/src/test/resources/target_code/CNNSupervisedTrainer_Alexnet.py
@@ -138,13 +138,18 @@ class CNNSupervisedTrainer_Alexnet:
                with autograd.record():
                    predictions_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)

+                    lossList = []
                    predictions_ = self._networks[0](data_)
+                    lossList.append(loss_function(predictions_, predictions_label))
+
+                    loss = 0
+                    for element in lossList:
+                        loss = loss + element

-                    loss = \
-                        loss_function(predictions_, predictions_label)

                loss.backward()

+
                for trainer in trainers:
                    trainer.step(batch_size)


--- a/src/test/resources/target_code/CNNSupervisedTrainer_CifarClassifierNetwork.py
+++ b/src/test/resources/target_code/CNNSupervisedTrainer_CifarClassifierNetwork.py
@@ -138,13 +138,18 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
                with autograd.record():
                    softmax_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)

+                    lossList = []
                    softmax_ = self._networks[0](data_)
+                    lossList.append(loss_function(softmax_, softmax_label))
+
+                    loss = 0
+                    for element in lossList:
+                        loss = loss + element

-                    loss = \
-                        loss_function(softmax_, softmax_label)

                loss.backward()

+
                for trainer in trainers:
                    trainer.step(batch_size)


--- a/src/test/resources/target_code/CNNSupervisedTrainer_VGG16.py
+++ b/src/test/resources/target_code/CNNSupervisedTrainer_VGG16.py
@@ -138,13 +138,18 @@ class CNNSupervisedTrainer_VGG16:
                with autograd.record():
                    predictions_ = mx.nd.zeros((batch_size, 1000,), ctx=mx_context)

+                    lossList = []
                    predictions_ = self._networks[0](data_)
+                    lossList.append(loss_function(predictions_, predictions_label))
+
+                    loss = 0
+                    for element in lossList:
+                        loss = loss + element

-                    loss = \
-                        loss_function(predictions_, predictions_label)

                loss.backward()

+
                for trainer in trainers:
                    trainer.step(batch_size)