From 916000d080211108b37adbbdd0fadbc25f32e815 Mon Sep 17 00:00:00 2001 From: Lukas Weber Date: Mon, 2 Sep 2019 15:51:39 +0200 Subject: [PATCH] switch from yaml to json for speed reasons --- meson.build | 4 +-- python/loadl | 4 +-- python/loadleveller/jobfile.py | 14 ++++----- python/loadleveller/jobstatus.py | 2 +- python/loadleveller/mcextract.py | 16 ++++------ python/loadleveller/taskmaker.py | 3 +- src/jobinfo.cpp | 16 ++++++---- src/parser.cpp | 51 +++++++++++++++----------------- src/parser.h | 23 +++++++------- src/results.cpp | 49 +++++++++++++++--------------- src/results.h | 11 +++---- src/runner_pt.cpp | 16 ++++------ src/runner_pt.h | 2 +- subprojects/nlohmann_json.wrap | 11 +++++++ subprojects/yaml-cpp.wrap | 4 --- 15 files changed, 112 insertions(+), 114 deletions(-) create mode 100644 subprojects/nlohmann_json.wrap delete mode 100644 subprojects/yaml-cpp.wrap diff --git a/meson.build b/meson.build index 86ab74c..bd678c9 100644 --- a/meson.build +++ b/meson.build @@ -9,7 +9,7 @@ project('load-leveller', 'c', 'cpp', ) fmt_dep = dependency('fmt', fallback : ['fmt', 'fmt_dep']) -yamlcpp_dep = dependency('yaml-cpp', version : '>=0.6.0', fallback : ['yaml-cpp', 'yamlcpp_dep']) +json_dep = dependency('nlohmann_json', fallback : ['nlohmann_json', 'nlohmann_json_dep']) mpi_dep = dependency('mpi', language : 'cpp') # HDF5 is a pain @@ -24,7 +24,7 @@ endif should_install = not meson.is_subproject() -loadleveller_deps = [ fmt_dep, yamlcpp_dep, mpi_dep, hdf5_dep ] +loadleveller_deps = [ fmt_dep, json_dep, mpi_dep, hdf5_dep ] subdir('src') diff --git a/python/loadl b/python/loadl index a582100..c0c0484 100755 --- a/python/loadl +++ b/python/loadl @@ -8,7 +8,7 @@ import os parser = argparse.ArgumentParser(description='Helper script for running and managing loadleveller Monte Carlo jobs.', usage='''loadl [] - is an executable that prints the job parameter YAML-file to stdout. It is convenient to use the taskmaker python module for this purpose. + is an executable that prints the job parameter JSON-file to stdout. It is convenient to use the taskmaker python module for this purpose. Possible commands and their shorthands are delete, d delete all data related to a job @@ -73,7 +73,7 @@ def run(): def delete(): import shutil datadir = '{}.data'.format(job.jobname) - results_file = '{}.results.yml'.format(job.jobname) + results_file = '{}.results.json'.format(job.jobname) if os.path.exists(datadir): print('$ rm -r {}'.format(datadir)) diff --git a/python/loadleveller/jobfile.py b/python/loadleveller/jobfile.py index eb37bb0..ac8c277 100644 --- a/python/loadleveller/jobfile.py +++ b/python/loadleveller/jobfile.py @@ -1,14 +1,10 @@ -import yaml +import json import os import subprocess import errno -try: - from yaml import CSafeLoader as SafeLoader -except ImportError: - from yaml import SafeLoader -'''Helpers for handling loadleveller jobfiles/scripts. For lack of a better idea, the job description files of loadleveller are actually executables that output a more verbose yaml parameter file to stdout. Use the taskmaker module to write the input scripts.''' +'''Helpers for handling loadleveller jobfiles/scripts. For lack of a better idea, the job description files of loadleveller are actually executables that output a more verbose json parameter file to stdout. Use the taskmaker module to write the input scripts.''' class JobFileGenError(Exception): pass @@ -24,7 +20,7 @@ class JobFile: raise JobFileGenError('Generation script "{}" had a non-zero return code. Treating as error.'.format(filename)) try: - parsed_job = yaml.load(self.raw_jobfile, Loader=SafeLoader) + parsed_job = json.loads(self.raw_jobfile) self.__dict__.update(parsed_job) except Exception as e: raise JobFileGenError('Could not parse job generation script output: {}'.format(e)) @@ -37,10 +33,10 @@ class JobFile: except OSError as e: if e.errno != errno.EEXIST: raise - job_input_filename = os.path.join(datadir, 'parameters.yml') + job_input_filename = os.path.join(datadir, 'parameters.json') with open(job_input_filename, 'w') as f: f.write(self.raw_jobfile) except Exception as e: - raise JobFileGenError('Could not write parameters.yml: {}'.format(e)) + raise JobFileGenError('Could not write parameters.json: {}'.format(e)) return job_input_filename diff --git a/python/loadleveller/jobstatus.py b/python/loadleveller/jobstatus.py index bef687b..ec2b76b 100644 --- a/python/loadleveller/jobstatus.py +++ b/python/loadleveller/jobstatus.py @@ -52,7 +52,7 @@ def job_need_merge(jobfile): result_mtime = 0 try: - result_mtime = os.path.getmtime(jobfile.jobname+'.results.yml') + result_mtime = os.path.getmtime(jobfile.jobname+'.results.json') except FileNotFoundError: return True diff --git a/python/loadleveller/mcextract.py b/python/loadleveller/mcextract.py index 9c4c8fe..c21f0b2 100644 --- a/python/loadleveller/mcextract.py +++ b/python/loadleveller/mcextract.py @@ -1,13 +1,9 @@ -import yaml +import json import numpy as np import itertools -try: - from yaml import CSafeLoader as SafeLoader -except ImportError: - from yaml import SafeLoader -'''This module can be used to easily extract Monte Carlo results from the *.results.yml file produced by the loadleveller library.''' +'''This module can be used to easily extract Monte Carlo results from the *.results.json file produced by the loadleveller library.''' class Observable: def __init__(self, num_tasks): @@ -21,7 +17,7 @@ class Observable: class MCArchive: def __init__(self, filename): with open(filename, 'r') as f: - doc = yaml.load(f, Loader=SafeLoader) + doc = json.load(f) param_names = set(sum([list(task['parameters'].keys()) for task in doc], [])) observable_names = set(sum([list(task['results'].keys()) for task in doc], [])) @@ -36,9 +32,9 @@ class MCArchive: for obs, value in task['results'].items(): o = self.observables[obs] - o.rebinning_bin_length[i] = int(value.get('rebinning_bin_length',0)) - o.rebinning_bin_count[i] = int(value.get('rebinning_bin_count',0)) - o.autocorrelation_time[i] = float(value.get('autocorrelation_time',0)) + o.rebinning_bin_length[i] = int(value.get('rebin_len',0)) + o.rebinning_bin_count[i] = int(value.get('rebin_count',0)) + o.autocorrelation_time[i] = float(value.get('autocorr_time',0)) o.mean[i] = np.array(value['mean'], dtype=float) o.error[i] = np.array(value['error'], dtype=float) diff --git a/python/loadleveller/taskmaker.py b/python/loadleveller/taskmaker.py index 4e49fca..e93237e 100644 --- a/python/loadleveller/taskmaker.py +++ b/python/loadleveller/taskmaker.py @@ -1,6 +1,7 @@ import sys import os import yaml +import json import numpy try: @@ -46,4 +47,4 @@ class TaskMaker: task_dict[k] = v jobfile_dict['tasks'][task_name] = task_dict - print(yaml.dump(jobfile_dict, Dumper=SafeDumper)) + json.dump(jobfile_dict, sys.stdout, indent=1) diff --git a/src/jobinfo.cpp b/src/jobinfo.cpp index 37d8f33..dd551e5 100644 --- a/src/jobinfo.cpp +++ b/src/jobinfo.cpp @@ -129,26 +129,32 @@ int jobinfo::read_dump_progress(int task_id) const { } void jobinfo::concatenate_results() { - std::ofstream cat_results{fmt::format("{}.results.yml", jobname)}; + std::ofstream cat_results{fmt::format("{}.results.json", jobname)}; + cat_results << "["; for(size_t i = 0; i < task_names.size(); i++) { - std::ifstream res_file{taskdir(i) + "/results.yml"}; + std::ifstream res_file{taskdir(i) + "/results.json"}; res_file.seekg(0, res_file.end); size_t size = res_file.tellg(); res_file.seekg(0, res_file.beg); std::vector buf(size + 1, 0); res_file.read(buf.data(), size); - cat_results << buf.data() << "\n"; + cat_results << buf.data(); + if(i < task_names.size()-1) { + cat_results << ","; + } + cat_results << "\n"; } + cat_results << "]\n"; } void jobinfo::merge_task(int task_id, const std::vector &evalables) { std::vector meas_files = list_run_files(taskdir(task_id), "meas\\.h5"); results results = merge(meas_files, evalables); - std::string result_filename = fmt::format("{}/results.yml", taskdir(task_id)); + std::string result_filename = fmt::format("{}/results.json", taskdir(task_id)); const std::string &task_name = task_names.at(task_id); - results.write_yaml(result_filename, taskdir(task_id), jobfile["tasks"][task_name].get_yaml()); + results.write_yaml(result_filename, taskdir(task_id), jobfile["tasks"][task_name].get_json()); } void jobinfo::log(const std::string &message) { diff --git a/src/parser.cpp b/src/parser.cpp index dfec39c..de72c13 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,28 +1,23 @@ #include "parser.h" +#include namespace loadl { -parser::iterator::iterator(std::string filename, YAML::Node::iterator it) +parser::iterator::iterator(std::string filename, json::iterator it) : filename_{std::move(filename)}, it_{std::move(it)} {} std::pair parser::iterator::operator*() { - try { - return std::make_pair(it_->first.as(), parser{it_->second, filename_}); - } catch(YAML::Exception &e) { - throw std::runtime_error( - fmt::format("YAML: {}: dereferencing map key failed: {}. Maybe it was not a string?", - filename_, e.what())); - } + return std::make_pair(it_.key(), parser{it_.value(), filename_}); } static std::runtime_error non_map_error(const std::string &filename) { return std::runtime_error( - fmt::format("YAML: {}: trying to dereference non-map node.", filename)); + fmt::format("json: {}: trying to dereference non-map node.", filename)); } static std::runtime_error key_error(const std::string &filename, const std::string &key) { return std::runtime_error( - fmt::format("YAML: {}: could not find required key '{}'", filename, key)); + fmt::format("json: {}: could not find required key '{}'", filename, key)); } parser::iterator parser::iterator::operator++() { @@ -33,17 +28,23 @@ bool parser::iterator::operator!=(const iterator &b) { return it_ != b.it_; } -parser::parser(const YAML::Node &node, const std::string &filename) - : content_{node}, filename_{filename} { - if(!content_.IsMap()) { +parser::parser(const json &node, const std::string &filename) + : content_(node), filename_{filename} { + if(!content_.is_object()) { throw non_map_error(filename); } } -parser::parser(const std::string &filename) : parser{YAML::LoadFile(filename), filename} {} +parser::parser(const std::string &filename) : filename_{filename} { + std::ifstream f(filename); + f >> content_; + if(!content_.is_object()) { + throw non_map_error(filename); + } +} parser::iterator parser::begin() { - if(!content_.IsMap()) { + if(!content_.is_object()) { throw non_map_error(filename_); } @@ -55,35 +56,31 @@ parser::iterator parser::end() { } bool parser::defined(const std::string &key) const { - if(!content_.IsMap()) { + if(!content_.is_object()) { return false; } - return content_[key].IsDefined(); + return content_.find(key) != content_.end(); } parser parser::operator[](const std::string &key) { - if(!content_.IsMap()) { + if(!content_.is_object()) { throw non_map_error(filename_); } auto node = content_[key]; - if(!node.IsDefined()) { + if(node.is_null()) { throw key_error(filename_, key); } - if(!node.IsMap()) { + if(!node.is_object()) { throw std::runtime_error(fmt::format( - "YAML: {}: Found key '{}', but it has a scalar value. Was expecting it to be a map", + "json: {}: Found key '{}', but it has a scalar value. Was expecting it to be a map", filename_, key)); } - try { - return parser{node, filename_}; - } catch(YAML::Exception &) { - throw key_error(filename_, key); - } + return parser{node, filename_}; } -const YAML::Node &parser::get_yaml() { +const json &parser::get_json() const { return content_; } } diff --git a/src/parser.h b/src/parser.h index f1fafb6..1278914 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace loadl { @@ -9,22 +9,24 @@ namespace loadl { // For simplicity it does not support advanced yaml features such as complex-typed // keys in maps. +using json = nlohmann::json; + class parser { private: - YAML::Node content_; + json content_; const std::string filename_; // fake parser based on a subnode - parser(const YAML::Node &node, const std::string &filename); + parser(const json &node, const std::string &filename); public: class iterator { private: std::string filename_; - YAML::Node::iterator it_; + json::iterator it_; public: - iterator(std::string filename, YAML::Node::iterator it); + iterator(std::string filename, json::iterator it); std::pair operator*(); iterator operator++(); bool operator!=(const iterator &b); @@ -34,16 +36,17 @@ public: template T get(const std::string &key) const { - if(!content_[key]) { + auto v = content_.find(key); + if(v == content_.end()) { throw std::runtime_error( - fmt::format("YAML: {}: required key '{}' not found.", filename_, key)); + fmt::format("json: {}: required key '{}' not found.", filename_, key)); } - return content_[key].as(); + return *v; } template auto get(const std::string &key, T default_val) const { - return content_[key].as(default_val); + return content_.value(key, default_val); } // is key defined? @@ -55,6 +58,6 @@ public: // This gives access to the underlying yaml-cpp api. Only use it if you absolutely need to. // This function is needed to dump the task settings into the result file for example. - const YAML::Node &get_yaml(); + const json &get_json() const; }; } diff --git a/src/results.cpp b/src/results.cpp index 330df85..1569f4d 100644 --- a/src/results.cpp +++ b/src/results.cpp @@ -1,42 +1,41 @@ #include "results.h" #include #include -#include +#include namespace loadl { -void results::write_yaml(const std::string &filename, const std::string &taskdir, - const YAML::Node ¶ms) { - YAML::Emitter out; - out << YAML::BeginSeq; - out << YAML::BeginMap; - out << YAML::Key << "task" << YAML::Value << taskdir; - out << YAML::Key << "parameters" << YAML::Value << params; - out << YAML::Key << "results" << YAML::Value << YAML::BeginMap; +void results::write_json(const std::string &filename, const std::string &taskdir, + const nlohmann::json ¶ms) { + using json = nlohmann::json; + + json obs_list; + for(auto &[obs_name, obs] : observables) { - out << YAML::Key << obs_name; - if(obs.internal_bin_length == 0) { - out << YAML::Comment("evalable"); - } - out << YAML::Value << YAML::BeginMap; - out << YAML::Key << "rebinning_bin_length" << YAML::Value << obs.rebinning_bin_length; - out << YAML::Key << "rebinning_bin_count" << YAML::Value << obs.rebinning_bin_count; - out << YAML::Key << "internal_bin_length" << YAML::Value << obs.internal_bin_length; double max_auto_time = 0; if(obs.autocorrelation_time.size() > 0) { max_auto_time = *std::max_element(obs.autocorrelation_time.begin(), obs.autocorrelation_time.end()); } - out << YAML::Key << "autocorrelation_time" << YAML::Value << max_auto_time; - out << YAML::Key << "mean" << YAML::Value << obs.mean; - out << YAML::Key << "error" << YAML::Value << obs.error; - out << YAML::EndMap; + + obs_list[obs_name] = { + {"rebin_len", obs.rebinning_bin_length}, + {"rebin_count", obs.rebinning_bin_count}, + {"internal_bin_len", obs.internal_bin_length}, + {"autocorr_time", max_auto_time}, + {"mean", obs.mean}, + {"error", obs.error}, + }; + } - out << YAML::EndMap; - out << YAML::EndMap; - out << YAML::EndSeq; + + nlohmann::json out = { + {"task", taskdir}, + {"parameters", params}, + {"results", obs_list} + }; std::ofstream file(filename); - file << out.c_str(); + file << out.dump(1); } } diff --git a/src/results.h b/src/results.h index 3d08273..53889d7 100644 --- a/src/results.h +++ b/src/results.h @@ -1,10 +1,7 @@ #pragma once #include #include - -namespace YAML { -class Node; -} +#include namespace loadl { @@ -35,8 +32,8 @@ struct observable_result { struct results { std::map observables; - // writes out the results in a yaml file. - void write_yaml(const std::string &filename, const std::string &taskdir, - const YAML::Node ¶ms); + // writes out the results in a json file. + void write_json(const std::string &filename, const std::string &taskdir, + const nlohmann::json ¶ms); }; } diff --git a/src/runner_pt.cpp b/src/runner_pt.cpp index da4f713..17b6951 100644 --- a/src/runner_pt.cpp +++ b/src/runner_pt.cpp @@ -264,18 +264,14 @@ void runner_pt_master::checkpoint_read() { } } -void runner_pt_master::write_params_yaml() { - using namespace YAML; - Emitter params; - params << BeginMap; +void runner_pt_master::write_params_json() { + nlohmann::json params; for(auto c : pt_chains_) { - params << Key << fmt::format("chain{:04d}", c.id); - params << Value << Flow << c.params; + params[fmt::format("chain{:04d}", c.id)] = c.params; } - params << EndMap; - std::ofstream file{job_.jobdir() + "/pt_optimized_params.yml"}; - file << params.c_str() << "\n"; + std::ofstream file{job_.jobdir() + "/pt_optimized_params.json"}; + file << params.dump(1) << "\n"; } void runner_pt_master::write_param_optimization_stats() { @@ -321,7 +317,7 @@ void runner_pt_master::checkpoint_write() { } if(po_config_.enabled) { - write_params_yaml(); + write_params_json(); } } diff --git a/src/runner_pt.h b/src/runner_pt.h index 8aba9fe..bcc93d2 100644 --- a/src/runner_pt.h +++ b/src/runner_pt.h @@ -76,7 +76,7 @@ private: void construct_pt_chains(); void checkpoint_write(); void checkpoint_read(); - void write_params_yaml(); + void write_params_json(); void write_param_optimization_stats(); int schedule_chain_run(); diff --git a/subprojects/nlohmann_json.wrap b/subprojects/nlohmann_json.wrap new file mode 100644 index 0000000..55e7de3 --- /dev/null +++ b/subprojects/nlohmann_json.wrap @@ -0,0 +1,11 @@ +[wrap-file] +directory = nlohmann_json-3.4.0 +lead_directory_missing = true + +source_url = https://github.com/nlohmann/json/releases/download/v3.4.0/include.zip +source_filename = nlohmann_json-3.4.0.zip +source_hash = bfec46fc0cee01c509cf064d2254517e7fa80d1e7647fea37cf81d97c5682bdc + +patch_url = https://wrapdb.mesonbuild.com/v1/projects/nlohmann_json/3.4.0/2/get_zip +patch_filename = nlohmann_json-3.4.0-2-wrap.zip +patch_hash = ce65c4827a8c19e36539ca48d713c5c327f9788ef99d578a996bc136a8619d75 diff --git a/subprojects/yaml-cpp.wrap b/subprojects/yaml-cpp.wrap deleted file mode 100644 index 7f24d59..0000000 --- a/subprojects/yaml-cpp.wrap +++ /dev/null @@ -1,4 +0,0 @@ -[wrap-git] -directory = yaml-cpp -url = https://github.com/lukas-weber/yaml-cpp.git -revision = master -- GitLab