Aufgrund einer Wartung wird GitLab am 28.09. zwischen 10:00 und 11:00 Uhr kurzzeitig nicht zur Verfügung stehen. / Due to maintenance, GitLab will be temporarily unavailable on 28.09. between 10:00 and 11:00 am.

Commit 4f73681a authored by Lukas Weber's avatar Lukas Weber

remove binsize limitations and fix bugs

parent fc8d34fd
...@@ -20,8 +20,8 @@ void mc::_init() { ...@@ -20,8 +20,8 @@ void mc::_init() {
// simple profiling support: measure the time spent for sweeps/measurements etc // simple profiling support: measure the time spent for sweeps/measurements etc
measure.add_observable("_ll_checkpoint_read_time", 1); measure.add_observable("_ll_checkpoint_read_time", 1);
measure.add_observable("_ll_checkpoint_write_time", 1); measure.add_observable("_ll_checkpoint_write_time", 1);
measure.add_observable("_ll_measurement_time", pt_mode_ ? pt_sweeps_per_global_update_ : 1000); measure.add_observable("_ll_measurement_time", 1000);
measure.add_observable("_ll_sweep_time", pt_mode_ ? pt_sweeps_per_global_update_ : 1000); measure.add_observable("_ll_sweep_time", 1000);
if(pt_mode_) { if(pt_mode_) {
if(param.get<bool>("pt_statistics", false)) { if(param.get<bool>("pt_statistics", false)) {
...@@ -69,23 +69,8 @@ void mc::_do_update() { ...@@ -69,23 +69,8 @@ void mc::_do_update() {
} }
} }
void mc::_pt_update_param(const std::string& param_name, double new_param, const std::string &new_dir) { void mc::_pt_update_param(int target_rank, const std::string& param_name, double new_param) {
// take over the bins of the new target dir measure.mpi_sendrecv(target_rank);
{
iodump dump_file = iodump::open_readonly(new_dir + ".dump.h5");
measure.checkpoint_read(dump_file.get_root().open_group("measurements"));
}
auto unclean = measure.is_unclean();
if(unclean) {
throw std::runtime_error(
fmt::format("Unclean observable: {}\nIn parallel tempering mode you have to choose the "
"binsize for all observables so that it is commensurate with "
"pt_sweeps_per_global_update (so that all bins are empty once it happens). "
"If you don’t like this limitation, implement it properly.",
*unclean));
}
pt_update_param(param_name, new_param); pt_update_param(param_name, new_param);
} }
...@@ -102,20 +87,16 @@ double mc::_pt_weight_ratio(const std::string& param_name, double new_param) { ...@@ -102,20 +87,16 @@ double mc::_pt_weight_ratio(const std::string& param_name, double new_param) {
return wr; return wr;
} }
void mc::measurements_write(const std::string &dir) { void mc::_write(const std::string &dir) {
struct timespec tstart, tend;
clock_gettime(CLOCK_MONOTONIC_RAW, &tstart);
// blocks limit scopes of the dump file handles to ensure they are closed at the right time. // blocks limit scopes of the dump file handles to ensure they are closed at the right time.
{ {
iodump meas_file = iodump::open_readwrite(dir + ".meas.h5"); iodump meas_file = iodump::open_readwrite(dir + ".meas.h5");
auto g = meas_file.get_root(); auto g = meas_file.get_root();
measure.samples_write(g); measure.samples_write(g);
} }
}
void mc::_write(const std::string &dir) {
struct timespec tstart, tend;
clock_gettime(CLOCK_MONOTONIC_RAW, &tstart);
measurements_write(dir);
{ {
iodump dump_file = iodump::create(dir + ".dump.h5.tmp"); iodump dump_file = iodump::create(dir + ".dump.h5.tmp");
......
...@@ -55,13 +55,11 @@ public: ...@@ -55,13 +55,11 @@ public:
void _write(const std::string &dir); void _write(const std::string &dir);
bool _read(const std::string &dir); bool _read(const std::string &dir);
void measurements_write(const std::string &dir);
void _write_output(const std::string &filename); void _write_output(const std::string &filename);
void _do_update(); void _do_update();
void _do_measurement(); void _do_measurement();
void _pt_update_param(const std::string& param_name, double new_param, const std::string &new_dir); void _pt_update_param(int target_rank, const std::string& param_name, double new_param);
double _pt_weight_ratio(const std::string& param_name, double new_param); double _pt_weight_ratio(const std::string& param_name, double new_param);
void pt_measure_statistics(); void pt_measure_statistics();
......
#include "measurements.h" #include "measurements.h"
#include <fmt/format.h> #include <fmt/format.h>
#include <mpi.h>
namespace loadl { namespace loadl {
bool measurements::observable_name_is_legal(const std::string &obs_name) { bool measurements::observable_name_is_legal(const std::string &obs_name) {
...@@ -31,7 +31,7 @@ void measurements::checkpoint_write(const iodump::group &dump_file) { ...@@ -31,7 +31,7 @@ void measurements::checkpoint_write(const iodump::group &dump_file) {
void measurements::checkpoint_read(const iodump::group &dump_file) { void measurements::checkpoint_read(const iodump::group &dump_file) {
for(const auto &obs_name : dump_file) { for(const auto &obs_name : dump_file) {
add_observable(obs_name); add_observable(obs_name);
observables_.at(obs_name).checkpoint_read(dump_file.open_group(obs_name)); observables_.at(obs_name).checkpoint_read(obs_name, dump_file.open_group(obs_name));
} }
} }
...@@ -42,12 +42,49 @@ void measurements::samples_write(const iodump::group &meas_file) { ...@@ -42,12 +42,49 @@ void measurements::samples_write(const iodump::group &meas_file) {
} }
} }
std::optional<std::string> measurements::is_unclean() const { void measurements::mpi_sendrecv(int target_rank) {
for(const auto &obs : observables_) { int rank;
if(!obs.second.is_clean()) { MPI_Comm_rank(MPI_COMM_WORLD, &rank);
return obs.first;
if(rank == target_rank) {
return;
}
if(mpi_checked_targets_.count(target_rank) == 0) {
if(rank < target_rank) {
unsigned long obscount = observables_.size();
MPI_Send(&obscount, 1, MPI_UNSIGNED_LONG, target_rank, 0, MPI_COMM_WORLD);
for(auto& [name, obs] : observables_) {
(void)obs;
int size = name.size()+1;
MPI_Send(&size, 1, MPI_INT, target_rank, 0, MPI_COMM_WORLD);
MPI_Send(name.c_str(), size, MPI_CHAR, target_rank, 0, MPI_COMM_WORLD);
}
} else {
unsigned long obscount;
MPI_Recv(&obscount, 1, MPI_UNSIGNED_LONG, target_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
if(obscount != observables_.size()) {
throw std::runtime_error{fmt::format("ranks {}&{} have to contain identical sets of registered observables. But they contain different amounts of observables! {} != {}.", target_rank, rank, obscount, observables_.size())};
}
for(auto& [name, obs] : observables_) {
(void)obs;
int size;
MPI_Recv(&size, 1, MPI_INT, target_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
std::vector<char> buf(size);
MPI_Recv(buf.data(), size, MPI_CHAR, target_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
if(std::string{buf.data()} != name) {
throw std::runtime_error{fmt::format("ranks {}&{} have to contain identical sets of registered observables. Found '{}' != '{}'.", target_rank, rank, name, std::string{buf.data()})};
}
}
} }
mpi_checked_targets_.insert(target_rank);
}
for(auto& [name, obs] : observables_) {
(void)name;
obs.mpi_sendrecv(target_rank);
} }
return std::nullopt;
} }
} }
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <string> #include <string>
#include <valarray> #include <valarray>
#include <vector> #include <vector>
#include <set>
namespace loadl { namespace loadl {
...@@ -26,11 +27,11 @@ public: ...@@ -26,11 +27,11 @@ public:
// should be opened in read/write mode. // should be opened in read/write mode.
void samples_write(const iodump::group &meas_file); void samples_write(const iodump::group &meas_file);
// returns nullopt if all observables are clean, // switches the content of the measurement buffers with the target_rank
// otherwise the name of a non-empty observable // both ranks must have the same set of observables!
std::optional<std::string> is_unclean() const; void mpi_sendrecv(int target_rank);
private: private:
std::set<int> mpi_checked_targets_;
std::map<std::string, observable> observables_; std::map<std::string, observable> observables_;
}; };
......
#include "observable.h" #include "observable.h"
#include <fmt/format.h> #include <mpi.h>
#include <iostream>
namespace loadl { namespace loadl {
observable::observable(std::string name, size_t bin_length, size_t vector_length) observable::observable(std::string name, size_t bin_length, size_t vector_length)
...@@ -23,7 +22,6 @@ void observable::checkpoint_write(const iodump::group &dump_file) const { ...@@ -23,7 +22,6 @@ void observable::checkpoint_write(const iodump::group &dump_file) const {
// Another sanity check: the samples_ array should contain one partial bin. // Another sanity check: the samples_ array should contain one partial bin.
assert(samples_.size() == vector_length_); assert(samples_.size() == vector_length_);
dump_file.write("name", name_);
dump_file.write("vector_length", vector_length_); dump_file.write("vector_length", vector_length_);
dump_file.write("bin_length", bin_length_); dump_file.write("bin_length", bin_length_);
dump_file.write("current_bin_filling", current_bin_filling_); dump_file.write("current_bin_filling", current_bin_filling_);
...@@ -48,8 +46,8 @@ void observable::measurement_write(const iodump::group &meas_file) { ...@@ -48,8 +46,8 @@ void observable::measurement_write(const iodump::group &meas_file) {
current_bin_ = 0; current_bin_ = 0;
} }
void observable::checkpoint_read(const iodump::group &d) { void observable::checkpoint_read(const std::string& name, const iodump::group &d) {
d.read("name", name_); name_ = name;
d.read("vector_length", vector_length_); d.read("vector_length", vector_length_);
d.read("bin_length", bin_length_); d.read("bin_length", bin_length_);
d.read("current_bin_filling", current_bin_filling_); d.read("current_bin_filling", current_bin_filling_);
...@@ -57,11 +55,22 @@ void observable::checkpoint_read(const iodump::group &d) { ...@@ -57,11 +55,22 @@ void observable::checkpoint_read(const iodump::group &d) {
current_bin_ = 0; current_bin_ = 0;
} }
bool observable::is_clean() const { void observable::mpi_sendrecv(int target_rank) {
if(current_bin_filling_ != 0) { const int msg_size = 4;
std::cout << current_bin_filling_ << "\n"; int rank;
} MPI_Comm_rank(MPI_COMM_WORLD, &rank);
return current_bin_ == 0 && current_bin_filling_ == 0;
unsigned long msg[msg_size] = {current_bin_, vector_length_, bin_length_, current_bin_filling_};
MPI_Sendrecv_replace(msg, msg_size, MPI_UNSIGNED_LONG, target_rank, 0, target_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
current_bin_ = msg[0];
vector_length_ = msg[1];
bin_length_ = msg[2];
current_bin_filling_ = msg[3];
std::vector<double> recvbuf((current_bin_+1)*vector_length_);
MPI_Sendrecv(samples_.data(), samples_.size(), MPI_DOUBLE, target_rank, 0, recvbuf.data(), recvbuf.size(), MPI_DOUBLE, target_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
samples_ = recvbuf;
} }
} }
...@@ -25,11 +25,11 @@ public: ...@@ -25,11 +25,11 @@ public:
// This will empty the cache of already completed bins // This will empty the cache of already completed bins
void measurement_write(const iodump::group &meas_file); void measurement_write(const iodump::group &meas_file);
void checkpoint_read(const iodump::group &dump_file); void checkpoint_read(const std::string& name, const iodump::group &dump_file);
// true if there are no samples in the bin
bool is_clean() const;
// switch copy with target rank.
// useful for parallel tempering mode
void mpi_sendrecv(int target_rank);
private: private:
static const size_t initial_bin_length = 1000; static const size_t initial_bin_length = 1000;
......
This diff is collapsed.
...@@ -9,18 +9,18 @@ struct pt_chain { ...@@ -9,18 +9,18 @@ struct pt_chain {
int id{}; int id{};
std::vector<int> task_ids; std::vector<int> task_ids;
std::vector<double> params; std::vector<double> params;
std::vector<int> nup_histogram;
std::vector<int> ndown_histogram;
int sweeps{-1}; int sweeps{-1};
int target_sweeps{-1}; int target_sweeps{-1};
int target_thermalization{-1}; int target_thermalization{-1};
int scheduled_runs{};
// parameter optimization
std::vector<int> nup_histogram;
std::vector<int> ndown_histogram;
int entries_before_optimization{0}; int entries_before_optimization{0};
int histogram_entries{}; int histogram_entries{};
int scheduled_runs{};
bool is_done(); bool is_done();
void checkpoint_read(const iodump::group &g); void checkpoint_read(const iodump::group &g);
void checkpoint_write(const iodump::group &g); void checkpoint_write(const iodump::group &g);
...@@ -36,12 +36,14 @@ private: ...@@ -36,12 +36,14 @@ private:
public: public:
int id{}; int id{};
int run_id{}; int run_id{};
bool swap_odd{};
pt_chain_run(const pt_chain &chain, int run_id); pt_chain_run(const pt_chain &chain, int run_id);
static pt_chain_run checkpoint_read(const iodump::group &g); static pt_chain_run checkpoint_read(const iodump::group &g);
void checkpoint_write(const iodump::group &g); void checkpoint_write(const iodump::group &g);
std::vector<int> rank_to_pos; std::vector<int> rank_to_pos;
std::vector<int> switch_partners;
std::vector<double> weight_ratios; std::vector<double> weight_ratios;
std::vector<int> last_visited; std::vector<int> last_visited;
...@@ -57,7 +59,6 @@ private: ...@@ -57,7 +59,6 @@ private:
double time_last_checkpoint_{0}; double time_last_checkpoint_{0};
bool use_param_optimization_{}; bool use_param_optimization_{};
bool pt_swap_odd_{};
std::vector<pt_chain> pt_chains_; std::vector<pt_chain> pt_chains_;
std::vector<pt_chain_run> pt_chain_runs_; std::vector<pt_chain_run> pt_chain_runs_;
int chain_len_; int chain_len_;
...@@ -108,8 +109,8 @@ private: ...@@ -108,8 +109,8 @@ private:
void pt_global_update(); void pt_global_update();
bool is_checkpoint_time(); int negotiate_timeout();
bool time_is_up();
void send_status(int status); void send_status(int status);
int recv_action(); int recv_action();
void checkpoint_write(); void checkpoint_write();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment