Commit 916000d0 authored by Lukas Weber's avatar Lukas Weber

switch from yaml to json for speed reasons

parent fcf08d9b
......@@ -9,7 +9,7 @@ project('load-leveller', 'c', 'cpp',
)
fmt_dep = dependency('fmt', fallback : ['fmt', 'fmt_dep'])
yamlcpp_dep = dependency('yaml-cpp', version : '>=0.6.0', fallback : ['yaml-cpp', 'yamlcpp_dep'])
json_dep = dependency('nlohmann_json', fallback : ['nlohmann_json', 'nlohmann_json_dep'])
mpi_dep = dependency('mpi', language : 'cpp')
# HDF5 is a pain
......@@ -24,7 +24,7 @@ endif
should_install = not meson.is_subproject()
loadleveller_deps = [ fmt_dep, yamlcpp_dep, mpi_dep, hdf5_dep ]
loadleveller_deps = [ fmt_dep, json_dep, mpi_dep, hdf5_dep ]
subdir('src')
......
......@@ -8,7 +8,7 @@ import os
parser = argparse.ArgumentParser(description='Helper script for running and managing loadleveller Monte Carlo jobs.', usage='''loadl <command> <jobscript> [<args>]
<jobscript> is an executable that prints the job parameter YAML-file to stdout. It is convenient to use the taskmaker python module for this purpose.
<jobscript> is an executable that prints the job parameter JSON-file to stdout. It is convenient to use the taskmaker python module for this purpose.
Possible commands and their shorthands are
delete, d delete all data related to a job
......@@ -73,7 +73,7 @@ def run():
def delete():
import shutil
datadir = '{}.data'.format(job.jobname)
results_file = '{}.results.yml'.format(job.jobname)
results_file = '{}.results.json'.format(job.jobname)
if os.path.exists(datadir):
print('$ rm -r {}'.format(datadir))
......
import yaml
import json
import os
import subprocess
import errno
try:
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeLoader
'''Helpers for handling loadleveller jobfiles/scripts. For lack of a better idea, the job description files of loadleveller are actually executables that output a more verbose yaml parameter file to stdout. Use the taskmaker module to write the input scripts.'''
'''Helpers for handling loadleveller jobfiles/scripts. For lack of a better idea, the job description files of loadleveller are actually executables that output a more verbose json parameter file to stdout. Use the taskmaker module to write the input scripts.'''
class JobFileGenError(Exception):
pass
......@@ -24,7 +20,7 @@ class JobFile:
raise JobFileGenError('Generation script "{}" had a non-zero return code. Treating as error.'.format(filename))
try:
parsed_job = yaml.load(self.raw_jobfile, Loader=SafeLoader)
parsed_job = json.loads(self.raw_jobfile)
self.__dict__.update(parsed_job)
except Exception as e:
raise JobFileGenError('Could not parse job generation script output: {}'.format(e))
......@@ -37,10 +33,10 @@ class JobFile:
except OSError as e:
if e.errno != errno.EEXIST:
raise
job_input_filename = os.path.join(datadir, 'parameters.yml')
job_input_filename = os.path.join(datadir, 'parameters.json')
with open(job_input_filename, 'w') as f:
f.write(self.raw_jobfile)
except Exception as e:
raise JobFileGenError('Could not write parameters.yml: {}'.format(e))
raise JobFileGenError('Could not write parameters.json: {}'.format(e))
return job_input_filename
......@@ -52,7 +52,7 @@ def job_need_merge(jobfile):
result_mtime = 0
try:
result_mtime = os.path.getmtime(jobfile.jobname+'.results.yml')
result_mtime = os.path.getmtime(jobfile.jobname+'.results.json')
except FileNotFoundError:
return True
......
import yaml
import json
import numpy as np
import itertools
try:
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeLoader
'''This module can be used to easily extract Monte Carlo results from the *.results.yml file produced by the loadleveller library.'''
'''This module can be used to easily extract Monte Carlo results from the *.results.json file produced by the loadleveller library.'''
class Observable:
def __init__(self, num_tasks):
......@@ -21,7 +17,7 @@ class Observable:
class MCArchive:
def __init__(self, filename):
with open(filename, 'r') as f:
doc = yaml.load(f, Loader=SafeLoader)
doc = json.load(f)
param_names = set(sum([list(task['parameters'].keys()) for task in doc], []))
observable_names = set(sum([list(task['results'].keys()) for task in doc], []))
......@@ -36,9 +32,9 @@ class MCArchive:
for obs, value in task['results'].items():
o = self.observables[obs]
o.rebinning_bin_length[i] = int(value.get('rebinning_bin_length',0))
o.rebinning_bin_count[i] = int(value.get('rebinning_bin_count',0))
o.autocorrelation_time[i] = float(value.get('autocorrelation_time',0))
o.rebinning_bin_length[i] = int(value.get('rebin_len',0))
o.rebinning_bin_count[i] = int(value.get('rebin_count',0))
o.autocorrelation_time[i] = float(value.get('autocorr_time',0))
o.mean[i] = np.array(value['mean'], dtype=float)
o.error[i] = np.array(value['error'], dtype=float)
......
import sys
import os
import yaml
import json
import numpy
try:
......@@ -46,4 +47,4 @@ class TaskMaker:
task_dict[k] = v
jobfile_dict['tasks'][task_name] = task_dict
print(yaml.dump(jobfile_dict, Dumper=SafeDumper))
json.dump(jobfile_dict, sys.stdout, indent=1)
......@@ -129,26 +129,32 @@ int jobinfo::read_dump_progress(int task_id) const {
}
void jobinfo::concatenate_results() {
std::ofstream cat_results{fmt::format("{}.results.yml", jobname)};
std::ofstream cat_results{fmt::format("{}.results.json", jobname)};
cat_results << "[";
for(size_t i = 0; i < task_names.size(); i++) {
std::ifstream res_file{taskdir(i) + "/results.yml"};
std::ifstream res_file{taskdir(i) + "/results.json"};
res_file.seekg(0, res_file.end);
size_t size = res_file.tellg();
res_file.seekg(0, res_file.beg);
std::vector<char> buf(size + 1, 0);
res_file.read(buf.data(), size);
cat_results << buf.data() << "\n";
cat_results << buf.data();
if(i < task_names.size()-1) {
cat_results << ",";
}
cat_results << "\n";
}
cat_results << "]\n";
}
void jobinfo::merge_task(int task_id, const std::vector<evalable> &evalables) {
std::vector<std::string> meas_files = list_run_files(taskdir(task_id), "meas\\.h5");
results results = merge(meas_files, evalables);
std::string result_filename = fmt::format("{}/results.yml", taskdir(task_id));
std::string result_filename = fmt::format("{}/results.json", taskdir(task_id));
const std::string &task_name = task_names.at(task_id);
results.write_yaml(result_filename, taskdir(task_id), jobfile["tasks"][task_name].get_yaml());
results.write_yaml(result_filename, taskdir(task_id), jobfile["tasks"][task_name].get_json());
}
void jobinfo::log(const std::string &message) {
......
#include "parser.h"
#include <fstream>
namespace loadl {
parser::iterator::iterator(std::string filename, YAML::Node::iterator it)
parser::iterator::iterator(std::string filename, json::iterator it)
: filename_{std::move(filename)}, it_{std::move(it)} {}
std::pair<std::string, parser> parser::iterator::operator*() {
try {
return std::make_pair(it_->first.as<std::string>(), parser{it_->second, filename_});
} catch(YAML::Exception &e) {
throw std::runtime_error(
fmt::format("YAML: {}: dereferencing map key failed: {}. Maybe it was not a string?",
filename_, e.what()));
}
return std::make_pair(it_.key(), parser{it_.value(), filename_});
}
static std::runtime_error non_map_error(const std::string &filename) {
return std::runtime_error(
fmt::format("YAML: {}: trying to dereference non-map node.", filename));
fmt::format("json: {}: trying to dereference non-map node.", filename));
}
static std::runtime_error key_error(const std::string &filename, const std::string &key) {
return std::runtime_error(
fmt::format("YAML: {}: could not find required key '{}'", filename, key));
fmt::format("json: {}: could not find required key '{}'", filename, key));
}
parser::iterator parser::iterator::operator++() {
......@@ -33,17 +28,23 @@ bool parser::iterator::operator!=(const iterator &b) {
return it_ != b.it_;
}
parser::parser(const YAML::Node &node, const std::string &filename)
: content_{node}, filename_{filename} {
if(!content_.IsMap()) {
parser::parser(const json &node, const std::string &filename)
: content_(node), filename_{filename} {
if(!content_.is_object()) {
throw non_map_error(filename);
}
}
parser::parser(const std::string &filename) : parser{YAML::LoadFile(filename), filename} {}
parser::parser(const std::string &filename) : filename_{filename} {
std::ifstream f(filename);
f >> content_;
if(!content_.is_object()) {
throw non_map_error(filename);
}
}
parser::iterator parser::begin() {
if(!content_.IsMap()) {
if(!content_.is_object()) {
throw non_map_error(filename_);
}
......@@ -55,35 +56,31 @@ parser::iterator parser::end() {
}
bool parser::defined(const std::string &key) const {
if(!content_.IsMap()) {
if(!content_.is_object()) {
return false;
}
return content_[key].IsDefined();
return content_.find(key) != content_.end();
}
parser parser::operator[](const std::string &key) {
if(!content_.IsMap()) {
if(!content_.is_object()) {
throw non_map_error(filename_);
}
auto node = content_[key];
if(!node.IsDefined()) {
if(node.is_null()) {
throw key_error(filename_, key);
}
if(!node.IsMap()) {
if(!node.is_object()) {
throw std::runtime_error(fmt::format(
"YAML: {}: Found key '{}', but it has a scalar value. Was expecting it to be a map",
"json: {}: Found key '{}', but it has a scalar value. Was expecting it to be a map",
filename_, key));
}
try {
return parser{node, filename_};
} catch(YAML::Exception &) {
throw key_error(filename_, key);
}
return parser{node, filename_};
}
const YAML::Node &parser::get_yaml() {
const json &parser::get_json() const {
return content_;
}
}
#pragma once
#include <fmt/format.h>
#include <yaml-cpp/yaml.h>
#include <nlohmann/json.hpp>
namespace loadl {
......@@ -9,22 +9,24 @@ namespace loadl {
// For simplicity it does not support advanced yaml features such as complex-typed
// keys in maps.
using json = nlohmann::json;
class parser {
private:
YAML::Node content_;
json content_;
const std::string filename_;
// fake parser based on a subnode
parser(const YAML::Node &node, const std::string &filename);
parser(const json &node, const std::string &filename);
public:
class iterator {
private:
std::string filename_;
YAML::Node::iterator it_;
json::iterator it_;
public:
iterator(std::string filename, YAML::Node::iterator it);
iterator(std::string filename, json::iterator it);
std::pair<std::string, parser> operator*();
iterator operator++();
bool operator!=(const iterator &b);
......@@ -34,16 +36,17 @@ public:
template<typename T>
T get(const std::string &key) const {
if(!content_[key]) {
auto v = content_.find(key);
if(v == content_.end()) {
throw std::runtime_error(
fmt::format("YAML: {}: required key '{}' not found.", filename_, key));
fmt::format("json: {}: required key '{}' not found.", filename_, key));
}
return content_[key].as<T>();
return *v;
}
template<typename T>
auto get(const std::string &key, T default_val) const {
return content_[key].as<T>(default_val);
return content_.value<T>(key, default_val);
}
// is key defined?
......@@ -55,6 +58,6 @@ public:
// This gives access to the underlying yaml-cpp api. Only use it if you absolutely need to.
// This function is needed to dump the task settings into the result file for example.
const YAML::Node &get_yaml();
const json &get_json() const;
};
}
#include "results.h"
#include <algorithm>
#include <fstream>
#include <yaml-cpp/yaml.h>
#include <nlohmann/json.hpp>
namespace loadl {
void results::write_yaml(const std::string &filename, const std::string &taskdir,
const YAML::Node &params) {
YAML::Emitter out;
out << YAML::BeginSeq;
out << YAML::BeginMap;
out << YAML::Key << "task" << YAML::Value << taskdir;
out << YAML::Key << "parameters" << YAML::Value << params;
out << YAML::Key << "results" << YAML::Value << YAML::BeginMap;
void results::write_json(const std::string &filename, const std::string &taskdir,
const nlohmann::json &params) {
using json = nlohmann::json;
json obs_list;
for(auto &[obs_name, obs] : observables) {
out << YAML::Key << obs_name;
if(obs.internal_bin_length == 0) {
out << YAML::Comment("evalable");
}
out << YAML::Value << YAML::BeginMap;
out << YAML::Key << "rebinning_bin_length" << YAML::Value << obs.rebinning_bin_length;
out << YAML::Key << "rebinning_bin_count" << YAML::Value << obs.rebinning_bin_count;
out << YAML::Key << "internal_bin_length" << YAML::Value << obs.internal_bin_length;
double max_auto_time = 0;
if(obs.autocorrelation_time.size() > 0) {
max_auto_time =
*std::max_element(obs.autocorrelation_time.begin(), obs.autocorrelation_time.end());
}
out << YAML::Key << "autocorrelation_time" << YAML::Value << max_auto_time;
out << YAML::Key << "mean" << YAML::Value << obs.mean;
out << YAML::Key << "error" << YAML::Value << obs.error;
out << YAML::EndMap;
obs_list[obs_name] = {
{"rebin_len", obs.rebinning_bin_length},
{"rebin_count", obs.rebinning_bin_count},
{"internal_bin_len", obs.internal_bin_length},
{"autocorr_time", max_auto_time},
{"mean", obs.mean},
{"error", obs.error},
};
}
out << YAML::EndMap;
out << YAML::EndMap;
out << YAML::EndSeq;
nlohmann::json out = {
{"task", taskdir},
{"parameters", params},
{"results", obs_list}
};
std::ofstream file(filename);
file << out.c_str();
file << out.dump(1);
}
}
#pragma once
#include <map>
#include <vector>
namespace YAML {
class Node;
}
#include <nlohmann/json.hpp>
namespace loadl {
......@@ -35,8 +32,8 @@ struct observable_result {
struct results {
std::map<std::string, observable_result> observables;
// writes out the results in a yaml file.
void write_yaml(const std::string &filename, const std::string &taskdir,
const YAML::Node &params);
// writes out the results in a json file.
void write_json(const std::string &filename, const std::string &taskdir,
const nlohmann::json &params);
};
}
......@@ -264,18 +264,14 @@ void runner_pt_master::checkpoint_read() {
}
}
void runner_pt_master::write_params_yaml() {
using namespace YAML;
Emitter params;
params << BeginMap;
void runner_pt_master::write_params_json() {
nlohmann::json params;
for(auto c : pt_chains_) {
params << Key << fmt::format("chain{:04d}", c.id);
params << Value << Flow << c.params;
params[fmt::format("chain{:04d}", c.id)] = c.params;
}
params << EndMap;
std::ofstream file{job_.jobdir() + "/pt_optimized_params.yml"};
file << params.c_str() << "\n";
std::ofstream file{job_.jobdir() + "/pt_optimized_params.json"};
file << params.dump(1) << "\n";
}
void runner_pt_master::write_param_optimization_stats() {
......@@ -321,7 +317,7 @@ void runner_pt_master::checkpoint_write() {
}
if(po_config_.enabled) {
write_params_yaml();
write_params_json();
}
}
......
......@@ -76,7 +76,7 @@ private:
void construct_pt_chains();
void checkpoint_write();
void checkpoint_read();
void write_params_yaml();
void write_params_json();
void write_param_optimization_stats();
int schedule_chain_run();
......
[wrap-file]
directory = nlohmann_json-3.4.0
lead_directory_missing = true
source_url = https://github.com/nlohmann/json/releases/download/v3.4.0/include.zip
source_filename = nlohmann_json-3.4.0.zip
source_hash = bfec46fc0cee01c509cf064d2254517e7fa80d1e7647fea37cf81d97c5682bdc
patch_url = https://wrapdb.mesonbuild.com/v1/projects/nlohmann_json/3.4.0/2/get_zip
patch_filename = nlohmann_json-3.4.0-2-wrap.zip
patch_hash = ce65c4827a8c19e36539ca48d713c5c327f9788ef99d578a996bc136a8619d75
[wrap-git]
directory = yaml-cpp
url = https://github.com/lukas-weber/yaml-cpp.git
revision = master
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment