Commit bf659541 authored by panicPaul's avatar panicPaul
Browse files

Merge branch 'working' into dataLoaderFix

parents 3d5977ef bdfa7631
Pipeline #566618 passed with stage
in 1 minute and 18 seconds
......@@ -9,7 +9,7 @@
<groupId>de.monticore.lang.monticar</groupId>
<artifactId>cnnarch-gluon-generator</artifactId>
<version>0.4.11-SNAPSHOT</version>
<version>0.4.12-SNAPSHOT</version>
<!-- == PROJECT DEPENDENCIES ============================================= -->
<properties>
......
......@@ -5,7 +5,6 @@ import de.monticore.lang.monticar.cnntrain.annotations.RewardFunctionParameter;
import de.monticore.lang.monticar.generator.pythonwrapper.symbolservices.data.ComponentPortInformation;
import de.monticore.lang.monticar.generator.pythonwrapper.symbolservices.data.EmadlType;
import de.monticore.lang.monticar.generator.pythonwrapper.symbolservices.data.PortVariable;
import jdk.nashorn.internal.runtime.options.Option;
import java.util.List;
import java.util.Optional;
......
......@@ -25,8 +25,25 @@ class CNNDataLoader_Alexnet:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
......@@ -25,8 +25,25 @@ class CNNDataLoader_CifarClassifierNetwork:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
......@@ -25,8 +25,25 @@ class CNNDataLoader_EpisodicMemoryNetwork:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
......@@ -26,8 +26,25 @@ class CNNDataLoader_Invariant:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
......@@ -23,13 +23,30 @@ class CNNDataLoader_LoadNetworkTest:
data_std = {}
train_images = {}
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
if 'images' in train_h5:
train_images = train_h5['images']
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
train_label = {}
index = 0
......
......@@ -26,8 +26,25 @@ class CNNDataLoader_MultipleStreams:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
......@@ -26,8 +26,25 @@ class CNNDataLoader_RNNencdec:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
......@@ -26,12 +26,30 @@ class CNNDataLoader_RNNsearch:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
train_label = {}
index = 0
for output_name in self._output_names_:
......
......@@ -26,8 +26,25 @@ class CNNDataLoader_RNNtest:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
......@@ -24,13 +24,30 @@ class CNNDataLoader_ResNeXt50:
data_std = {}
train_images = {}
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
if 'images' in train_h5:
train_images = train_h5['images']
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
train_label = {}
index = 0
......
......@@ -26,8 +26,25 @@ class CNNDataLoader_Show_attend_tell:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
......@@ -26,11 +26,29 @@ class CNNDataLoader_ThreeInputCNN_M14:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
train_images = train_h5['images']
train_label = {}
index = 0
......
......@@ -25,8 +25,25 @@ class CNNDataLoader_VGG16:
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_dataset = train_h5[input_name]
train_dataset_shape = train_data[input_name].shape
# slice_size limits the memory consumption, by only loading slices of size <500MB into memory
slice_size = min(train_dataset_shape[0] - 1, int(500e6 / (train_h5[input_name][0].size * \
train_h5[input_name][0].itemsize)))
num_slices = max(1, int(train_h5[input_name].shape[0] / slice_size))
mean = np.zeros(train_dataset_shape[1: ])
std = np.zeros(train_dataset_shape[1: ])
for i in range(int(train_dataset_shape[0] / slice_size)):
mean += train_dataset[i * slice_size: (i + 1) * slice_size].mean(axis=0) / num_slices
std += train_dataset[i * slice_size: (i + 1) * slice_size].std(axis=0) / num_slices
if slice_size > train_dataset_shape[0] - 1:
mean += train_dataset[num_slices * slice_size: ].mean(axis=0) / (slice_size - num_slices % slice_size)
std += train_dataset[num_slices * slice_size: ].std(axis=0) / (slice_size - num_slices % slice_size)
std += 1e-5
data_mean[input_name + '_'] = nd.array(mean)
data_std[input_name + '_'] = nd.array(std)
if 'images' in train_h5:
train_images = train_h5['images']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment