Commit 61aae46a authored by laochailan's avatar laochailan

add intel_mkl rng backend

it doesn’t seem to be faster than internal_mersenne still. but maybe in an actually simulation for some cases it is?
parent e6560aac
...@@ -22,10 +22,21 @@ if not hdf5_dep.found() or get_option('force_hdf5_fallback') ...@@ -22,10 +22,21 @@ if not hdf5_dep.found() or get_option('force_hdf5_fallback')
hdf5_dep = hdf5_proj.get_variable('hdf5_dep') hdf5_dep = hdf5_proj.get_variable('hdf5_dep')
endif endif
should_install = not meson.is_subproject()
loadleveller_deps = [ fmt_dep, json_dep, mpi_dep, hdf5_dep ] loadleveller_deps = [ fmt_dep, json_dep, mpi_dep, hdf5_dep ]
# intel mkl
mkl_dep = dependency('mkl-dynamic-ilp64-seq', required : false)
if not mkl_dep.found()
mkl_dep = dependency('mkl-static-ilp64-seq', required : (get_option('rng_backend') == 'intel_mkl'))
endif
if mkl_dep.found()
loadleveller_deps += mkl_dep
endif
should_install = not meson.is_subproject()
subdir('src') subdir('src')
if not meson.is_subproject() if not meson.is_subproject()
......
option('rng_backend', type : 'combo', choices : ['internal_mersenne', 'stl_mt19937', 'stl_mt19937_64'], description : 'The backend used for the random number generator provided by loadleveller') option('rng_backend', type : 'combo', choices : ['internal_mersenne', 'stl_mt19937', 'intel_mkl'], description : 'The backend used for the random number generator provided by loadleveller')
option('force_hdf5_fallback', type : 'boolean', value : false, description : 'Use the hdf5.wrap even if an installed versionwas found.') option('force_hdf5_fallback', type : 'boolean', value : false, description : 'Use the hdf5.wrap even if an installed version was found.')
...@@ -99,7 +99,7 @@ def merge(): ...@@ -99,7 +99,7 @@ def merge():
sys.exit(1) sys.exit(1)
cmd = '{} merge "{}"'.format(job.jobconfig['mc_binary'], job_input_filename) cmd = '{} merge "{}"'.format(job.jobconfig['mc_binary'], job_input_filename)
print('$ '+cmd) print('$ '+cmd)
subprocess.run(cmd, shell=True) subprocess.run(cmd, shell=True, check=True)
def status(): def status():
from loadleveller import jobstatus from loadleveller import jobstatus
......
#pragma once #pragma once
#define RNG_BACKEND @rng_backend@ #define RNG_BACKEND @rng_backend@
#define HAVE_INTEL_MKL @have_intel_mkl@
rng_names = { rng_names = {
'stl_mt19937' : 'rng_stl<std::mt19937>', 'stl_mt19937' : 'rng_stl<std::mt19937>',
'stl_mt19937_64' : 'rng_stl<std::mt19937_64>',
'internal_mersenne' : 'rng_internal_mersenne', 'internal_mersenne' : 'rng_internal_mersenne',
'intel_mkl' : 'rng_intel_mkl',
} }
conf_data = configuration_data() conf_data = configuration_data()
conf_data.set('rng_backend', rng_names[get_option('rng_backend')]) conf_data.set('rng_backend', rng_names[get_option('rng_backend')])
conf_data.set('have_intel_mkl', mkl_dep.found())
configure_file(input : 'config.h.in', configure_file(input : 'config.h.in',
output : 'config.h', output : 'config.h',
install : should_install, install : should_install,
...@@ -22,7 +23,6 @@ loadleveller_sources = files([ ...@@ -22,7 +23,6 @@ loadleveller_sources = files([
'merger.cpp', 'merger.cpp',
'observable.cpp', 'observable.cpp',
'parser.cpp', 'parser.cpp',
'random.cpp',
'results.cpp', 'results.cpp',
'runner.cpp', 'runner.cpp',
'runner_pt.cpp', 'runner_pt.cpp',
...@@ -39,10 +39,13 @@ loadleveller_headers = files([ ...@@ -39,10 +39,13 @@ loadleveller_headers = files([
'mc.h', 'mc.h',
'measurements.h', 'measurements.h',
'merger.h', 'merger.h',
'MersenneTwister.h',
'observable.h', 'observable.h',
'parser.h', 'parser.h',
'random.h', 'random.h',
'random/MersenneTwister.h',
'random/stl_random.h',
'random/intel_mkl.h',
'random/internal_mt.h',
'results.h', 'results.h',
'runner.h', 'runner.h',
'runner_pt.h', 'runner_pt.h',
......
#include "random.h"
namespace loadl {
void rng_internal_mersenne::set_seed(uint64_t seed) {
mtrand_.seed(seed);
}
void rng_internal_mersenne::backend_checkpoint_write(const iodump::group &d) {
std::vector<uint64_t> rand_state;
mtrand_.save(rand_state);
d.write("state", rand_state);
}
void rng_internal_mersenne::backend_checkpoint_read(const iodump::group &d) {
std::vector<uint64_t> rand_state;
d.read("state", rand_state);
mtrand_.load(rand_state);
}
}
#pragma once #pragma once
#include "config.h" #include "config.h"
#include <mpi.h>
#include "MersenneTwister.h"
#include "iodump.h"
#include <random> #include <random>
#include <sstream> #include "iodump.h"
#include <typeinfo> #include <typeinfo>
// A whole bunch of template magic to make the random backend modular. // A whole bunch of template magic to make the random backend modular.
// We don’t want a vtable on such a performance critical object, otherwise I // We don’t want a vtable on such a performance critical object, otherwise I
// would make it settable at runtime. // would make it settable at runtime.
#include "random/stl_random.h"
#include "random/internal_mt.h"
#ifdef HAVE_INTEL_MKL
#include "random/intel_mkl.h"
#endif
namespace loadl { namespace loadl {
template<class base> template<class base>
...@@ -78,60 +86,6 @@ public: ...@@ -78,60 +86,6 @@ public:
double rand_double(); // in [0,1] double rand_double(); // in [0,1]
}; };
// based on a dinosaur code in the MersenneTwister.h header
class rng_internal_mersenne {
private:
MTRand mtrand_;
public:
void backend_checkpoint_write(const iodump::group &dump_file);
void backend_checkpoint_read(const iodump::group &dump_file);
void set_seed(uint64_t seed);
double random_double() {
return mtrand_.randDblExc(1);
}
int random_integer(int bound) {
return mtrand_.randInt(bound - 1);
}
};
// based on the c++ stl implementation
template<typename stl_engine>
class rng_stl {
public:
void backend_checkpoint_write(const iodump::group &dump_file) {
std::stringstream buf;
buf << generator_;
buf << real_dist_;
dump_file.write("state", buf.str());
}
void backend_checkpoint_read(const iodump::group &dump_file) {
std::string state;
dump_file.read("state", state);
std::stringstream buf(state);
buf >> generator_;
buf >> real_dist_;
}
void set_seed(uint64_t seed) {
generator_.seed(seed);
}
double random_double() {
return real_dist_(generator_);
}
uint32_t random_integer(uint32_t bound) {
std::uniform_int_distribution<uint32_t> int_dist(0, bound);
return int_dist(generator_);
}
private:
stl_engine generator_;
std::uniform_real_distribution<double> real_dist_{0, 1};
};
// RNG_BACKEND is a macro set by the build system. If you add backends and you can help it, // RNG_BACKEND is a macro set by the build system. If you add backends and you can help it,
// avoid using huge blocks of #ifdefs as it will lead to dead code nobody uses for 10 years. // avoid using huge blocks of #ifdefs as it will lead to dead code nobody uses for 10 years.
......
#pragma once
#include <mkl_vsl.h>
#include <vector>
#include "iodump.h"
namespace loadl {
class rng_intel_mkl {
private:
static const int buffer_size_ = 10000;
std::array<double, buffer_size_> buffer_;
int buffer_idx_{buffer_size_};
VSLStreamStatePtr random_stream_{};
void refill_buffer() {
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, random_stream_, buffer_size_, buffer_.data(), 0, 1);
}
public:
void backend_checkpoint_write(const iodump::group &dump_file) {
std::vector<char> save_buf(vslGetStreamSize(random_stream_));
int rc = vslSaveStreamM(random_stream_, save_buf.data());
if(rc != VSL_ERROR_OK) {
throw std::runtime_error{fmt::format("Could not write Intel MKL RNG: error code {}", rc)};
}
dump_file.write("state", save_buf);
dump_file.write("buffer", std::vector<double>(buffer_.begin(), buffer_.end()));
dump_file.write("buffer_idx", buffer_idx_);
}
void backend_checkpoint_read(const iodump::group &dump_file) {
if(random_stream_ != nullptr) {
vslDeleteStream(&random_stream_);
}
std::vector<char> load_buf;
dump_file.read("state", load_buf);
int rc = vslLoadStreamM(&random_stream_, load_buf.data());
if(rc != VSL_ERROR_OK) {
throw std::runtime_error{fmt::format("Could not load Intel MKL RNG: error code {}", rc)};
}
std::vector<double> tmpbuffer;
dump_file.read("buffer", tmpbuffer);
assert(tmpbuffer.size() == buffer_.size());
std::copy(tmpbuffer.begin(), tmpbuffer.end(), buffer_.begin());
dump_file.read("buffer_idx", buffer_idx_);
}
void set_seed(uint64_t seed) {
if(random_stream_ != nullptr) {
vslDeleteStream(&random_stream_);
}
int rc = vslNewStream(&random_stream_, VSL_BRNG_SFMT19937, seed);
if(rc != VSL_ERROR_OK) {
throw std::runtime_error{fmt::format("Could not initialize Intel MKL RNG: error code {}", rc)};
}
}
double random_double() {
if(buffer_idx_ >= buffer_size_) {
refill_buffer();
buffer_idx_ = 0;
}
return buffer_[buffer_idx_++];
}
uint32_t random_integer(uint32_t bound) {
return bound*random_double(); // TODO: use the optimized integer distribution at the cost of a second buffer?
}
~rng_intel_mkl() {
vslDeleteStream(&random_stream_);
}
};
}
#pragma once
#include "iodump.h"
#include "MersenneTwister.h"
namespace loadl {
// based on a dinosaur code in the MersenneTwister.h header
class rng_internal_mersenne {
private:
MTRand mtrand_;
public:
void backend_checkpoint_write(const iodump::group &dump_file) {
std::vector<uint64_t> rand_state;
mtrand_.save(rand_state);
dump_file.write("state", rand_state);
}
void backend_checkpoint_read(const iodump::group &dump_file) {
std::vector<uint64_t> rand_state;
dump_file.read("state", rand_state);
mtrand_.load(rand_state);
}
void set_seed(uint64_t seed) {
mtrand_.seed(seed);
}
double random_double() {
return mtrand_.randDblExc(1);
}
int random_integer(int bound) {
return mtrand_.randInt(bound - 1);
}
};
}
#pragma once
#include <sstream>
#include <random>
#include "iodump.h"
namespace loadl {
// based on the c++ stl implementation
template<typename stl_engine>
class rng_stl {
public:
void backend_checkpoint_write(const iodump::group &dump_file) {
std::stringstream buf;
buf << generator_;
buf << real_dist_;
dump_file.write("state", buf.str());
}
void backend_checkpoint_read(const iodump::group &dump_file) {
std::string state;
dump_file.read("state", state);
std::stringstream buf(state);
buf >> generator_;
buf >> real_dist_;
}
void set_seed(uint64_t seed) {
generator_.seed(seed);
}
double random_double() {
return real_dist_(generator_);
}
uint32_t random_integer(uint32_t bound) {
std::uniform_int_distribution<uint32_t> int_dist(0, bound);
return int_dist(generator_);
}
private:
stl_engine generator_;
std::uniform_real_distribution<double> real_dist_{0, 1};
};
}
#include <loadleveller/random.h>
#include <iostream>
int main(int, char **) {
const uint64_t nsamples = 200000000;
loadl::random_number_generator rng(123456789);
double sum = 0;
for(uint64_t i = 0; i < nsamples; i++) {
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
sum += rng.random_double();
}
std::cout << sum/(16*nsamples);
}
...@@ -7,4 +7,7 @@ t1 = executable('tests', ...@@ -7,4 +7,7 @@ t1 = executable('tests',
test('tests', t1) test('tests', t1)
bench_rng = executable('bench_rng', ['bench_rng.cpp'], dependencies : loadleveller_dep)
benchmark('rng', bench_rng)
subdir('silly_mc') subdir('silly_mc')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment