From 61aae46a10673e46af971f4d4d6e017a54244d41 Mon Sep 17 00:00:00 2001 From: laochailan Date: Thu, 30 Apr 2020 10:09:22 +0200 Subject: [PATCH] add intel_mkl rng backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit it doesn’t seem to be faster than internal_mersenne still. but maybe in an actually simulation for some cases it is? --- meson.build | 15 +++++- meson_options.txt | 4 +- python/loadl | 2 +- src/config.h.in | 1 + src/meson.build | 9 ++-- src/random.cpp | 20 -------- src/random.h | 70 +++++--------------------- src/{ => random}/MersenneTwister.h | 0 src/random/intel_mkl.h | 79 ++++++++++++++++++++++++++++++ src/random/internal_mt.h | 40 +++++++++++++++ src/random/stl_random.h | 45 +++++++++++++++++ test/bench_rng.cpp | 30 ++++++++++++ test/meson.build | 3 ++ 13 files changed, 232 insertions(+), 86 deletions(-) delete mode 100644 src/random.cpp rename src/{ => random}/MersenneTwister.h (100%) create mode 100644 src/random/intel_mkl.h create mode 100644 src/random/internal_mt.h create mode 100644 src/random/stl_random.h create mode 100644 test/bench_rng.cpp diff --git a/meson.build b/meson.build index bd678c9..8460c43 100644 --- a/meson.build +++ b/meson.build @@ -22,10 +22,21 @@ if not hdf5_dep.found() or get_option('force_hdf5_fallback') hdf5_dep = hdf5_proj.get_variable('hdf5_dep') endif -should_install = not meson.is_subproject() - loadleveller_deps = [ fmt_dep, json_dep, mpi_dep, hdf5_dep ] +# intel mkl +mkl_dep = dependency('mkl-dynamic-ilp64-seq', required : false) +if not mkl_dep.found() + mkl_dep = dependency('mkl-static-ilp64-seq', required : (get_option('rng_backend') == 'intel_mkl')) +endif + +if mkl_dep.found() + loadleveller_deps += mkl_dep +endif + + +should_install = not meson.is_subproject() + subdir('src') if not meson.is_subproject() diff --git a/meson_options.txt b/meson_options.txt index 5a394a9..9a03b23 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,3 +1,3 @@ -option('rng_backend', type : 'combo', choices : ['internal_mersenne', 'stl_mt19937', 'stl_mt19937_64'], description : 'The backend used for the random number generator provided by loadleveller') +option('rng_backend', type : 'combo', choices : ['internal_mersenne', 'stl_mt19937', 'intel_mkl'], description : 'The backend used for the random number generator provided by loadleveller') -option('force_hdf5_fallback', type : 'boolean', value : false, description : 'Use the hdf5.wrap even if an installed versionwas found.') +option('force_hdf5_fallback', type : 'boolean', value : false, description : 'Use the hdf5.wrap even if an installed version was found.') diff --git a/python/loadl b/python/loadl index 5242e35..56b04e7 100755 --- a/python/loadl +++ b/python/loadl @@ -99,7 +99,7 @@ def merge(): sys.exit(1) cmd = '{} merge "{}"'.format(job.jobconfig['mc_binary'], job_input_filename) print('$ '+cmd) - subprocess.run(cmd, shell=True) + subprocess.run(cmd, shell=True, check=True) def status(): from loadleveller import jobstatus diff --git a/src/config.h.in b/src/config.h.in index d198f09..9239490 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -1,3 +1,4 @@ #pragma once #define RNG_BACKEND @rng_backend@ +#define HAVE_INTEL_MKL @have_intel_mkl@ diff --git a/src/meson.build b/src/meson.build index 51dec70..cfa647f 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,11 +1,12 @@ rng_names = { 'stl_mt19937' : 'rng_stl', - 'stl_mt19937_64' : 'rng_stl', 'internal_mersenne' : 'rng_internal_mersenne', + 'intel_mkl' : 'rng_intel_mkl', } conf_data = configuration_data() conf_data.set('rng_backend', rng_names[get_option('rng_backend')]) +conf_data.set('have_intel_mkl', mkl_dep.found()) configure_file(input : 'config.h.in', output : 'config.h', install : should_install, @@ -22,7 +23,6 @@ loadleveller_sources = files([ 'merger.cpp', 'observable.cpp', 'parser.cpp', - 'random.cpp', 'results.cpp', 'runner.cpp', 'runner_pt.cpp', @@ -39,10 +39,13 @@ loadleveller_headers = files([ 'mc.h', 'measurements.h', 'merger.h', - 'MersenneTwister.h', 'observable.h', 'parser.h', 'random.h', + 'random/MersenneTwister.h', + 'random/stl_random.h', + 'random/intel_mkl.h', + 'random/internal_mt.h', 'results.h', 'runner.h', 'runner_pt.h', diff --git a/src/random.cpp b/src/random.cpp deleted file mode 100644 index f5e9d14..0000000 --- a/src/random.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "random.h" - -namespace loadl { - -void rng_internal_mersenne::set_seed(uint64_t seed) { - mtrand_.seed(seed); -} - -void rng_internal_mersenne::backend_checkpoint_write(const iodump::group &d) { - std::vector rand_state; - mtrand_.save(rand_state); - d.write("state", rand_state); -} - -void rng_internal_mersenne::backend_checkpoint_read(const iodump::group &d) { - std::vector rand_state; - d.read("state", rand_state); - mtrand_.load(rand_state); -} -} diff --git a/src/random.h b/src/random.h index cc94dfe..4ca14f2 100644 --- a/src/random.h +++ b/src/random.h @@ -1,17 +1,25 @@ #pragma once #include "config.h" - -#include "MersenneTwister.h" -#include "iodump.h" +#include #include -#include +#include "iodump.h" #include // A whole bunch of template magic to make the random backend modular. // We don’t want a vtable on such a performance critical object, otherwise I // would make it settable at runtime. + +#include "random/stl_random.h" + +#include "random/internal_mt.h" + +#ifdef HAVE_INTEL_MKL +#include "random/intel_mkl.h" +#endif + + namespace loadl { template @@ -78,60 +86,6 @@ public: double rand_double(); // in [0,1] }; -// based on a dinosaur code in the MersenneTwister.h header -class rng_internal_mersenne { -private: - MTRand mtrand_; - -public: - void backend_checkpoint_write(const iodump::group &dump_file); - void backend_checkpoint_read(const iodump::group &dump_file); - void set_seed(uint64_t seed); - - double random_double() { - return mtrand_.randDblExc(1); - } - - int random_integer(int bound) { - return mtrand_.randInt(bound - 1); - } -}; - -// based on the c++ stl implementation -template -class rng_stl { -public: - void backend_checkpoint_write(const iodump::group &dump_file) { - std::stringstream buf; - buf << generator_; - buf << real_dist_; - dump_file.write("state", buf.str()); - } - void backend_checkpoint_read(const iodump::group &dump_file) { - std::string state; - dump_file.read("state", state); - std::stringstream buf(state); - buf >> generator_; - buf >> real_dist_; - } - - void set_seed(uint64_t seed) { - generator_.seed(seed); - } - - double random_double() { - return real_dist_(generator_); - } - - uint32_t random_integer(uint32_t bound) { - std::uniform_int_distribution int_dist(0, bound); - return int_dist(generator_); - } - -private: - stl_engine generator_; - std::uniform_real_distribution real_dist_{0, 1}; -}; // RNG_BACKEND is a macro set by the build system. If you add backends and you can help it, // avoid using huge blocks of #ifdefs as it will lead to dead code nobody uses for 10 years. diff --git a/src/MersenneTwister.h b/src/random/MersenneTwister.h similarity index 100% rename from src/MersenneTwister.h rename to src/random/MersenneTwister.h diff --git a/src/random/intel_mkl.h b/src/random/intel_mkl.h new file mode 100644 index 0000000..983cd7b --- /dev/null +++ b/src/random/intel_mkl.h @@ -0,0 +1,79 @@ +#pragma once + +#include +#include +#include "iodump.h" + +namespace loadl { + +class rng_intel_mkl { +private: + static const int buffer_size_ = 10000; + + std::array buffer_; + int buffer_idx_{buffer_size_}; + + VSLStreamStatePtr random_stream_{}; + + void refill_buffer() { + vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, random_stream_, buffer_size_, buffer_.data(), 0, 1); + } +public: + void backend_checkpoint_write(const iodump::group &dump_file) { + std::vector save_buf(vslGetStreamSize(random_stream_)); + int rc = vslSaveStreamM(random_stream_, save_buf.data()); + if(rc != VSL_ERROR_OK) { + throw std::runtime_error{fmt::format("Could not write Intel MKL RNG: error code {}", rc)}; + } + dump_file.write("state", save_buf); + dump_file.write("buffer", std::vector(buffer_.begin(), buffer_.end())); + dump_file.write("buffer_idx", buffer_idx_); + } + + void backend_checkpoint_read(const iodump::group &dump_file) { + if(random_stream_ != nullptr) { + vslDeleteStream(&random_stream_); + } + std::vector load_buf; + dump_file.read("state", load_buf); + int rc = vslLoadStreamM(&random_stream_, load_buf.data()); + if(rc != VSL_ERROR_OK) { + throw std::runtime_error{fmt::format("Could not load Intel MKL RNG: error code {}", rc)}; + } + + std::vector tmpbuffer; + dump_file.read("buffer", tmpbuffer); + assert(tmpbuffer.size() == buffer_.size()); + std::copy(tmpbuffer.begin(), tmpbuffer.end(), buffer_.begin()); + dump_file.read("buffer_idx", buffer_idx_); + } + + void set_seed(uint64_t seed) { + if(random_stream_ != nullptr) { + vslDeleteStream(&random_stream_); + } + int rc = vslNewStream(&random_stream_, VSL_BRNG_SFMT19937, seed); + if(rc != VSL_ERROR_OK) { + throw std::runtime_error{fmt::format("Could not initialize Intel MKL RNG: error code {}", rc)}; + } + } + + double random_double() { + if(buffer_idx_ >= buffer_size_) { + refill_buffer(); + buffer_idx_ = 0; + } + return buffer_[buffer_idx_++]; + } + + + uint32_t random_integer(uint32_t bound) { + return bound*random_double(); // TODO: use the optimized integer distribution at the cost of a second buffer? + } + + ~rng_intel_mkl() { + vslDeleteStream(&random_stream_); + } +}; + +} diff --git a/src/random/internal_mt.h b/src/random/internal_mt.h new file mode 100644 index 0000000..b40a4a4 --- /dev/null +++ b/src/random/internal_mt.h @@ -0,0 +1,40 @@ +#pragma once + + +#include "iodump.h" +#include "MersenneTwister.h" + +namespace loadl { + +// based on a dinosaur code in the MersenneTwister.h header +class rng_internal_mersenne { +private: + MTRand mtrand_; + +public: + void backend_checkpoint_write(const iodump::group &dump_file) { + std::vector rand_state; + mtrand_.save(rand_state); + dump_file.write("state", rand_state); + } + + void backend_checkpoint_read(const iodump::group &dump_file) { + std::vector rand_state; + dump_file.read("state", rand_state); + mtrand_.load(rand_state); + } + void set_seed(uint64_t seed) { + mtrand_.seed(seed); + } + + + double random_double() { + return mtrand_.randDblExc(1); + } + + int random_integer(int bound) { + return mtrand_.randInt(bound - 1); + } +}; + +} diff --git a/src/random/stl_random.h b/src/random/stl_random.h new file mode 100644 index 0000000..689faf3 --- /dev/null +++ b/src/random/stl_random.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include "iodump.h" + +namespace loadl { + +// based on the c++ stl implementation +template +class rng_stl { +public: + void backend_checkpoint_write(const iodump::group &dump_file) { + std::stringstream buf; + buf << generator_; + buf << real_dist_; + dump_file.write("state", buf.str()); + } + void backend_checkpoint_read(const iodump::group &dump_file) { + std::string state; + dump_file.read("state", state); + std::stringstream buf(state); + buf >> generator_; + buf >> real_dist_; + } + + void set_seed(uint64_t seed) { + generator_.seed(seed); + } + + double random_double() { + return real_dist_(generator_); + } + + uint32_t random_integer(uint32_t bound) { + std::uniform_int_distribution int_dist(0, bound); + return int_dist(generator_); + } + +private: + stl_engine generator_; + std::uniform_real_distribution real_dist_{0, 1}; +}; + +} diff --git a/test/bench_rng.cpp b/test/bench_rng.cpp new file mode 100644 index 0000000..8ad1abd --- /dev/null +++ b/test/bench_rng.cpp @@ -0,0 +1,30 @@ +#include +#include + +int main(int, char **) { + const uint64_t nsamples = 200000000; + + loadl::random_number_generator rng(123456789); + + double sum = 0; + for(uint64_t i = 0; i < nsamples; i++) { + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + sum += rng.random_double(); + } + + std::cout << sum/(16*nsamples); +} diff --git a/test/meson.build b/test/meson.build index 7d1a9c0..d971ac9 100644 --- a/test/meson.build +++ b/test/meson.build @@ -7,4 +7,7 @@ t1 = executable('tests', test('tests', t1) +bench_rng = executable('bench_rng', ['bench_rng.cpp'], dependencies : loadleveller_dep) +benchmark('rng', bench_rng) + subdir('silly_mc') -- GitLab