diff --git a/python/loadleveller/jobstatus.py b/python/loadleveller/jobstatus.py index b22e5dca08613165b350a11fe69ddfd8ae03d3a3..bef687b4fb020c23b73f323fa6c8a4ad79117483 100644 --- a/python/loadleveller/jobstatus.py +++ b/python/loadleveller/jobstatus.py @@ -31,15 +31,15 @@ class JobProgress: for runfile in glob.iglob('{}.data/{}/run*.dump.h5'.format(self.jobfile.jobname,task)): tp.num_runs += 1 - + sweeps_per_global_update = 1 + if 'parallel_tempering_parameter' in jobfile.jobconfig.keys(): + sweeps_per_global_update = jobfile.tasks[task].get('pt_sweeps_per_global_update',1) with h5py.File(runfile, 'r') as f: - sweeps = f['/sweeps'][0]//jobfile.tasks[task].get('pt_sweeps_per_global_update',1) - - tp.therm_sweeps += min(sweeps,tp.target_therm) - tp.sweeps += max(0,sweeps - tp.target_therm) + tp.sweeps += f['/sweeps'][0]//sweeps_per_global_update + tp.therm_sweeps += f['/thermalization_sweeps'][0]//sweeps_per_global_update - if tp.therm_sweeps < tp.target_therm or tp.sweeps < tp.target_sweeps: + if tp.sweeps < tp.target_sweeps: self.restart = True self.progress.append(tp) diff --git a/src/mc.cpp b/src/mc.cpp index 84ff277017fe5ef4fe1f088a231d3f5a42699dfd..9f093b74945ec0d70f9e502605b1199b56b02e90 100644 --- a/src/mc.cpp +++ b/src/mc.cpp @@ -3,7 +3,7 @@ namespace loadl { mc::mc(const parser &p) : param{p} { therm_ = p.get("thermalization"); - pt_sweeps_per_global_update_ = p.get("pt_sweeps_per_global_update", -1); + pt_sweeps_per_global_update_ = p.get("pt_sweeps_per_global_update", 1); } void mc::write_output(const std::string &) {} @@ -82,7 +82,12 @@ void mc::_write(const std::string &dir) { checkpoint_write(g.open_group("simulation")); measure.checkpoint_write(g.open_group("measurements")); - g.write("sweeps", sweep_); + int therm = therm_; + if(pt_mode_) { + therm *= pt_sweeps_per_global_update_; + } + g.write("thermalization_sweeps", std::min(sweep_,therm_)); + g.write("sweeps", std::max(0,sweep_-therm_)); } rename((dir + ".dump.h5.tmp").c_str(), (dir + ".dump.h5").c_str()); @@ -108,7 +113,10 @@ bool mc::_read(const std::string &dir) { measure.checkpoint_read(g.open_group("measurements")); checkpoint_read(g.open_group("simulation")); - g.read("sweeps", sweep_); + int sweeps, therm_sweeps; + g.read("thermalization_sweeps", therm_sweeps); + g.read("sweeps", sweeps); + sweep_ = sweeps + therm_sweeps; clock_gettime(CLOCK_MONOTONIC_RAW, &tend); measure.add("_ll_checkpoint_read_time", diff --git a/src/runner.cpp b/src/runner.cpp index d684e5c98d876db15d3c41792515fcc49277a27a..4d26ef2c53d178e8ead675595ff22d1b8d6e78c3 100644 --- a/src/runner.cpp +++ b/src/runner.cpp @@ -85,9 +85,7 @@ void runner_master::react() { send_action(A_NEW_JOB, node); tasks_[current_task_id_].scheduled_runs++; int msg[3] = {current_task_id_, tasks_[current_task_id_].scheduled_runs, - tasks_[current_task_id_].target_sweeps + - tasks_[current_task_id_].target_thermalization - - tasks_[current_task_id_].sweeps}; + tasks_[current_task_id_].target_sweeps - tasks_[current_task_id_].sweeps}; MPI_Send(&msg, sizeof(msg) / sizeof(msg[0]), MPI_INT, node, T_NEW_JOB, MPI_COMM_WORLD); } } else if(node_status == S_BUSY) { @@ -126,11 +124,10 @@ void runner_master::read() { auto task = job_.jobfile["tasks"][job_.task_names[i]]; int target_sweeps = task.get("sweeps"); - int target_thermalization = task.get("thermalization"); int sweeps = job_.read_dump_progress(i); int scheduled_runs = 0; - tasks_.emplace_back(target_sweeps, target_thermalization, sweeps, scheduled_runs); + tasks_.emplace_back(target_sweeps, sweeps, scheduled_runs); } } @@ -163,10 +160,10 @@ void runner_slave::start() { while(sweeps_since_last_query_ < sweeps_before_communication_) { sys_->_do_update(); - sweeps_since_last_query_++; if(sys_->is_thermalized()) { sys_->_do_measurement(); + sweeps_since_last_query_++; } if(is_checkpoint_time() || time_is_up()) { diff --git a/src/runner_pt.cpp b/src/runner_pt.cpp index 3d847bf38a39b3242af561a9d8541706c409d140..21b5255ad62e912c59cd1270dfbbf68697766a10 100644 --- a/src/runner_pt.cpp +++ b/src/runner_pt.cpp @@ -124,7 +124,7 @@ std::tuple pt_chain::optimize_params() { } bool pt_chain::is_done() { - return sweeps >= target_sweeps + target_thermalization; + return sweeps >= target_sweeps; } int runner_pt_start(jobinfo job, const mc_factory &mccreator, int argc, char **argv) { @@ -182,20 +182,20 @@ void runner_pt_master::construct_pt_chains() { chain.task_ids.at(chain_pos) = i; const char *pt_sweep_error = - "in parallel tempering mode, sweeps are measured in global updates and need to be the " + "chain {}: in parallel tempering mode, sweeps are measured in global updates and need to be the " "same within each chain: {} = {} != {}"; int target_sweeps = task.get("sweeps"); if(chain.target_sweeps >= 0 && target_sweeps != chain.target_sweeps) { throw std::runtime_error{ - fmt::format(pt_sweep_error, "target_sweeps", chain.target_sweeps, target_sweeps)}; + fmt::format(pt_sweep_error, chain.id, "target_sweeps", chain.target_sweeps, target_sweeps)}; } chain.target_sweeps = target_sweeps; int target_thermalization = task.get("thermalization"); if(chain.target_thermalization >= 0 && target_thermalization != chain.target_thermalization) { - throw std::runtime_error{fmt::format(pt_sweep_error, "thermalization", + throw std::runtime_error{fmt::format(pt_sweep_error, chain.id, "thermalization", chain.target_thermalization, target_thermalization)}; } @@ -204,7 +204,7 @@ void runner_pt_master::construct_pt_chains() { int sweeps_per_global_update = task.get("pt_sweeps_per_global_update"); int sweeps = job_.read_dump_progress(i) / sweeps_per_global_update; if(chain.sweeps >= 0 && sweeps != chain.sweeps) { - throw std::runtime_error{fmt::format(pt_sweep_error, "sweeps", chain.sweeps, sweeps)}; + throw std::runtime_error{fmt::format(pt_sweep_error, chain.id, "sweeps", chain.sweeps, sweeps)}; } chain.sweeps = sweeps; } @@ -402,7 +402,7 @@ int runner_pt_master::assign_new_chain(int rank_section) { auto &chain = pt_chains_[chain_run.id]; msg[0] = chain.task_ids[target]; msg[1] = chain_run.run_id; - msg[2] = chain.target_sweeps + chain.target_thermalization - chain.sweeps; + msg[2] = chain.target_sweeps + chain.sweeps; } else { // this will prompt the slave to quit num_active_ranks_--; @@ -593,7 +593,9 @@ void runner_pt_slave::start() { if(sys_->sweep() % sweeps_per_global_update_ == 0) { pt_global_update(); - sweeps_since_last_query_++; + if(sys_->is_thermalized()) { + sweeps_since_last_query_++; + } timeout = negotiate_timeout(); if(timeout != TR_CONTINUE) { diff --git a/src/runner_single.cpp b/src/runner_single.cpp index fc3e01cdde025a148ac80045b1d081cd40015637..8646f11a8cf898c9854cfcdfdbdb53d75f405c52 100644 --- a/src/runner_single.cpp +++ b/src/runner_single.cpp @@ -35,9 +35,9 @@ int runner_single::start() { while(!tasks_[task_id_].is_done() && !time_is_up()) { sys_->_do_update(); - tasks_[task_id_].sweeps++; if(sys_->is_thermalized()) { sys_->_do_measurement(); + tasks_[task_id_].sweeps++; } if(is_checkpoint_time()) { @@ -79,11 +79,10 @@ void runner_single::read() { auto task = job_.jobfile["tasks"][job_.task_names[i]]; int target_sweeps = task.get("sweeps"); - int target_thermalization = task.get("thermalization"); int sweeps = 0; sweeps = job_.read_dump_progress(i); - tasks_.emplace_back(target_sweeps, target_thermalization, sweeps, 0); + tasks_.emplace_back(target_sweeps, sweeps, 0); } } diff --git a/src/runner_task.cpp b/src/runner_task.cpp index ba356ffe2f04e1e116e284a3f70fc9dd0527f667..100ed234fc1ea4e00066a28d617d9ab22e93ecfc 100644 --- a/src/runner_task.cpp +++ b/src/runner_task.cpp @@ -3,12 +3,12 @@ namespace loadl { -runner_task::runner_task(int target_sweeps, int target_thermalization, int sweeps, +runner_task::runner_task(int target_sweeps, int sweeps, int scheduled_runs) - : target_sweeps{target_sweeps}, target_thermalization{target_thermalization}, sweeps{sweeps}, + : target_sweeps{target_sweeps}, sweeps{sweeps}, scheduled_runs{scheduled_runs} {} bool runner_task::is_done() const { - return sweeps >= (target_sweeps + target_thermalization); + return sweeps > target_sweeps; } } diff --git a/src/runner_task.h b/src/runner_task.h index a5a817dbc0a5c29169d67827af30d6fd81665d2c..2fa2ed80af5cfa582fdd3318878ce03a705b7db3 100644 --- a/src/runner_task.h +++ b/src/runner_task.h @@ -5,11 +5,10 @@ namespace loadl { // used by the runner struct runner_task { int target_sweeps; - int target_thermalization; int sweeps; int scheduled_runs; bool is_done() const; - runner_task(int target_sweeps, int target_thermalization, int sweeps, int scheduled_runs); + runner_task(int target_sweeps, int sweeps, int scheduled_runs); }; }