runner_pt.h 2.8 KB
Newer Older
1 2
#pragma once
#include "runner.h"
3 4
#include <map>
#include <vector>
5 6 7 8 9 10

namespace loadl {

struct pt_chain {
	int id{};
	std::vector<int> task_ids;
11 12 13 14 15
	std::vector<double> params;

	int sweeps{-1};
	int target_sweeps{-1};
	int target_thermalization{-1};
16
	int scheduled_runs{};
Lukas Weber's avatar
Lukas Weber committed
17

18 19 20
	// parameter optimization
	std::vector<int> nup_histogram;
	std::vector<int> ndown_histogram;
21
	int entries_before_optimization{0};
22

23
	std::vector<double> rejection_rates;
Lukas Weber's avatar
Lukas Weber committed
24
	std::vector<int> rejection_rate_entries{0,0};
25

26
	bool is_done();
27 28 29 30
	void checkpoint_read(const iodump::group &g);
	void checkpoint_write(const iodump::group &g);

	void clear_histograms();
31
	int histogram_entries();
32
	std::tuple<double, double> optimize_params();
33 34 35 36 37
};

struct pt_chain_run {
private:
	pt_chain_run() = default;
38

39
public:
Lukas Weber's avatar
ehhhh  
Lukas Weber committed
40 41
	int id{};
	int run_id{};
42
	bool swap_odd{};
Lukas Weber's avatar
Lukas Weber committed
43

44
	std::vector<int> rank_to_pos;
45
	std::vector<int> last_visited;
46
	std::vector<int> switch_partners;
Lukas Weber's avatar
ehhhh  
Lukas Weber committed
47
	std::vector<double> weight_ratios;
48

49 50 51
	pt_chain_run(const pt_chain &chain, int run_id);
	static pt_chain_run checkpoint_read(const iodump::group &g);
	void checkpoint_write(const iodump::group &g);
52 53

	void clear_histograms();
54 55 56 57 58 59 60 61 62 63 64
};

int runner_pt_start(jobinfo job, const mc_factory &mccreator, int argc, char **argv);

class runner_pt_master {
private:
	jobinfo job_;
	int num_active_ranks_{0};

	double time_last_checkpoint_{0};

65 66 67 68 69 70 71
	// parameter optimization
	struct {
		bool enabled{};
		int nsamples_initial{};
		double nsamples_growth{};
	} po_config_;

72 73 74 75 76
	std::vector<pt_chain> pt_chains_;
	std::vector<pt_chain_run> pt_chain_runs_;
	int chain_len_;
	std::unique_ptr<random_number_generator> rng_;

77
	std::map<int, int> rank_to_chain_run_;
78 79
	int current_chain_id_{-1};

80 81
	measurements pt_meas_;

82 83 84
	void construct_pt_chains();
	void checkpoint_write();
	void checkpoint_read();
85
	void write_params_yaml();
86
	void write_param_optimization_stats();
87 88

	int schedule_chain_run();
89
	void pt_global_update(pt_chain &chain, pt_chain_run &chain_run);
Lukas Weber's avatar
Lukas Weber committed
90
	void pt_param_optimization(pt_chain &chain, pt_chain_run &chain_run);
91 92 93

	void react();
	void send_action(int action, int destination);
94
	int assign_new_chain(int rank_section);
95 96 97 98 99 100 101 102 103 104 105 106 107

public:
	runner_pt_master(jobinfo job);
	void start();
};

class runner_pt_slave {
private:
	jobinfo job_;

	mc_factory mccreator_;
	std::unique_ptr<mc> sys_;

108 109 110
	MPI_Comm chain_comm_;
	int chain_rank_{};

111 112 113 114 115 116 117 118 119 120
	double time_last_checkpoint_{0};
	double time_start_{0};

	int rank_{};
	int sweeps_since_last_query_{};
	int sweeps_before_communication_{};
	int sweeps_per_global_update_{};
	int task_id_{-1};
	int run_id_{-1};

121 122
	double current_param_{};

123 124
	void pt_global_update();

125 126
	int negotiate_timeout();

127 128 129 130 131 132 133 134 135 136 137 138
	void send_status(int status);
	int recv_action();
	void checkpoint_write();
	void merge_measurements();
	bool accept_new_chain();
	int what_is_next(int status);

public:
	runner_pt_slave(jobinfo job, mc_factory mccreator);
	void start();
};
}