runner_pt.h 2.76 KB
Newer Older
1 2
#pragma once
#include "runner.h"
3 4
#include <map>
#include <vector>
5 6 7 8 9 10

namespace loadl {

struct pt_chain {
	int id{};
	std::vector<int> task_ids;
11 12 13 14 15
	std::vector<double> params;

	int sweeps{-1};
	int target_sweeps{-1};
	int target_thermalization{-1};
16
	int scheduled_runs{};
Lukas Weber's avatar
Lukas Weber committed
17

18 19 20
	// parameter optimization
	std::vector<int> nup_histogram;
	std::vector<int> ndown_histogram;
21
	int entries_before_optimization{0};
22 23

	bool is_done();
24 25 26 27
	void checkpoint_read(const iodump::group &g);
	void checkpoint_write(const iodump::group &g);

	void clear_histograms();
28
	int histogram_entries();
29
	std::tuple<double, double> optimize_params(double relaxation_fac);
30 31 32 33 34
};

struct pt_chain_run {
private:
	pt_chain_run() = default;
35

36
public:
Lukas Weber's avatar
ehhhh  
Lukas Weber committed
37 38
	int id{};
	int run_id{};
39
	bool swap_odd{};
Lukas Weber's avatar
Lukas Weber committed
40

41
	std::vector<int> rank_to_pos;
42
	std::vector<int> last_visited;
43
	std::vector<int> switch_partners;
Lukas Weber's avatar
ehhhh  
Lukas Weber committed
44
	std::vector<double> weight_ratios;
45

46 47 48
	pt_chain_run(const pt_chain &chain, int run_id);
	static pt_chain_run checkpoint_read(const iodump::group &g);
	void checkpoint_write(const iodump::group &g);
49 50

	void clear_histograms();
51 52 53 54 55 56 57 58 59 60 61
};

int runner_pt_start(jobinfo job, const mc_factory &mccreator, int argc, char **argv);

class runner_pt_master {
private:
	jobinfo job_;
	int num_active_ranks_{0};

	double time_last_checkpoint_{0};

62 63 64 65 66 67 68 69
	// parameter optimization
	struct {
		bool enabled{};
		int nsamples_initial{};
		double nsamples_growth{};
		double relaxation_fac{};
	} po_config_;

70 71 72 73 74
	std::vector<pt_chain> pt_chains_;
	std::vector<pt_chain_run> pt_chain_runs_;
	int chain_len_;
	std::unique_ptr<random_number_generator> rng_;

75
	std::map<int, int> rank_to_chain_run_;
76 77
	int current_chain_id_{-1};

78 79
	measurements pt_meas_;

80 81 82
	void construct_pt_chains();
	void checkpoint_write();
	void checkpoint_read();
83
	void write_params_yaml();
84
	void write_param_optimization_stats();
85 86

	int schedule_chain_run();
87
	void pt_global_update(pt_chain &chain, pt_chain_run &chain_run);
Lukas Weber's avatar
Lukas Weber committed
88
	void pt_param_optimization(pt_chain &chain, pt_chain_run &chain_run);
89 90 91

	void react();
	void send_action(int action, int destination);
92
	int assign_new_chain(int rank_section);
93 94 95 96 97 98 99 100 101 102 103 104 105

public:
	runner_pt_master(jobinfo job);
	void start();
};

class runner_pt_slave {
private:
	jobinfo job_;

	mc_factory mccreator_;
	std::unique_ptr<mc> sys_;

106 107 108
	MPI_Comm chain_comm_;
	int chain_rank_{};

109 110 111 112 113 114 115 116 117 118
	double time_last_checkpoint_{0};
	double time_start_{0};

	int rank_{};
	int sweeps_since_last_query_{};
	int sweeps_before_communication_{};
	int sweeps_per_global_update_{};
	int task_id_{-1};
	int run_id_{-1};

119 120
	double current_param_{};

121 122
	void pt_global_update();

123 124
	int negotiate_timeout();

125 126 127 128 129 130 131 132 133 134 135 136
	void send_status(int status);
	int recv_action();
	void checkpoint_write();
	void merge_measurements();
	bool accept_new_chain();
	int what_is_next(int status);

public:
	runner_pt_slave(jobinfo job, mc_factory mccreator);
	void start();
};
}