Commit b4d68aa8 authored by Lukas Weber's avatar Lukas Weber

make merge skip corrupted observables instead of failing

parent fc377147
......@@ -12,6 +12,12 @@ namespace loadl {
results merge(const std::vector<std::filesystem::path> &filenames, size_t rebinning_bin_length,
size_t sample_skip) {
results res;
class merge_error : public std::runtime_error {
public:
merge_error(const std::string &msg) : std::runtime_error{msg} {
}
};
// This thing reads the complete time series of an observable which will
// probably make it the biggest memory user of load leveller. But since
......@@ -25,37 +31,47 @@ results merge(const std::vector<std::filesystem::path> &filenames, size_t rebinn
iodump meas_file = iodump::open_readonly(filename);
auto g = meas_file.get_root();
for(const auto &obs_name : g) {
size_t vector_length;
size_t vector_length{};
size_t internal_bin_length{};
std::vector<double> samples;
auto obs_group = g.open_group(obs_name);
size_t sample_size = obs_group.get_extent("samples");
if(sample_size == 0) { // ignore empty observables
continue;
}
try {
size_t sample_size = obs_group.get_extent("samples");
if(res.observables.count(obs_name) == 0)
res.observables.emplace(obs_name, observable_result());
auto &obs = res.observables.at(obs_name);
obs.name = obs_name;
if(sample_size == 0) { // ignore empty observables
continue;
}
obs_group.read("bin_length", obs.internal_bin_length);
obs_group.read("vector_length", vector_length);
obs_group.read("samples", samples);
obs_group.read("bin_length", internal_bin_length);
obs_group.read("vector_length", vector_length);
obs_group.read("samples", samples);
if(sample_size % vector_length != 0) {
throw std::runtime_error{
"merge: sample count is not an integer multiple of the vector length. Corrupt "
"file?"};
}
if(vector_length == 0) {
throw merge_error{"zero vector_length"};
}
sample_size /= vector_length;
if(sample_size % vector_length != 0) {
throw merge_error{"sample size is not a multiple of vector_length"};
}
obs.total_sample_count += sample_size - std::min(sample_size, sample_skip);
obs.mean.resize(vector_length);
obs.error.resize(vector_length);
obs.autocorrelation_time.resize(vector_length);
if(res.observables.count(obs_name) == 0)
res.observables.emplace(obs_name, observable_result());
auto &obs = res.observables.at(obs_name);
obs.name = obs_name;
obs.internal_bin_length = internal_bin_length;
sample_size /= vector_length;
obs.total_sample_count += sample_size - std::min(sample_size, sample_skip);
obs.mean.resize(vector_length);
obs.error.resize(vector_length);
obs.autocorrelation_time.resize(vector_length);
} catch(const merge_error &e) {
std::cerr << fmt::format("merge: Observable {}:{} corrupted: {}. Skipping...\n", filename.string(), obs_name, e.what());
} catch(const iodump_exception &e) {
std::cerr << fmt::format("merge: {}. Skipping...\n", e.what());
}
}
}
......
......@@ -26,6 +26,24 @@ proc = subprocess.run(['valgrind', args.silly_mc, 'single', param_file])
if proc.returncode != 0:
sys.exit(1)
# inject some corrupt observables
measfile = h5py.File(jobdir + '/task0001/run0001.meas.h5', 'a')
corrupt = measfile.create_group('/Corrupt')
corrupt['samples'] = [1,3,3,7.1337]
corrupt['vector_length'] = [0]
corrupt = measfile.create_group('/Corrupt2')
corrupt['samples'] = [1,3,3,7.1337]
corrupt['vector_length'] = [5]
corrupt['bin_length'] = [5]
measfile.close()
proc = subprocess.run(['valgrind', args.silly_mc, 'merge', param_file])
if proc.returncode != 0:
sys.exit(1)
# read results and compare
with open('silly_job.results.json', 'r') as f:
results = json.load(f)
with open(param_file, 'r') as f:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment