Aufgrund einer Wartung wird GitLab am 17.08. zwischen 8:30 und 9:00 Uhr kurzzeitig nicht zur Verfügung stehen. / Due to maintenance, GitLab will be temporarily unavailable on 17.08. between 8:30 and 9:00 am.

Commit a5ba2e88 authored by Lukas Weber's avatar Lukas Weber

fix oddities with done jobs

parent 7880d080
......@@ -50,7 +50,7 @@ def run():
# check age of the different files
binary_modtime = os.stat(job.jobconfig['mc_binary']).st_mtime
try:
f = next(glob.iglob('{}.data/{}/*.h5'.format(job.jobname,job.tasks.keys()[-1]))) # only check one of the output files for speed
f = next(glob.iglob('{}.data/*/*.h5'.format(job.jobname))) # only check one of the output files for speed
data_modtime = os.stat(f).st_mtime
label = 'Warning' if args_run.force else 'Error'
......
......@@ -77,7 +77,6 @@ void runner_master::react() {
int node = stat.MPI_SOURCE;
if(node_status == S_IDLE) {
current_task_id_ = get_new_task_id(current_task_id_);
if(current_task_id_ < 0) {
send_action(A_EXIT, node);
num_active_ranks_--;
......@@ -85,7 +84,7 @@ void runner_master::react() {
send_action(A_NEW_JOB, node);
tasks_[current_task_id_].scheduled_runs++;
int msg[3] = {current_task_id_, tasks_[current_task_id_].scheduled_runs,
tasks_[current_task_id_].target_sweeps - tasks_[current_task_id_].sweeps};
std::max(1,tasks_[current_task_id_].target_sweeps - tasks_[current_task_id_].sweeps)};
MPI_Send(&msg, sizeof(msg) / sizeof(msg[0]), MPI_INT, node, T_NEW_JOB, MPI_COMM_WORLD);
}
} else if(node_status == S_BUSY) {
......
......@@ -350,7 +350,8 @@ void runner_pt_master::start() {
MPI_Comm tmp;
MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, 0, &tmp);
for(int rank_section = 0; rank_section < (num_active_ranks_ - 1) / chain_len_; rank_section++) {
int chain_count = (num_active_ranks_ - 1) / chain_len_;
for(int rank_section = 0; rank_section < chain_count; rank_section++) {
assign_new_chain(rank_section);
}
......@@ -394,7 +395,6 @@ int runner_pt_master::schedule_chain_run() {
int runner_pt_master::assign_new_chain(int rank_section) {
int chain_run_id = schedule_chain_run();
for(int target = 0; target < chain_len_; target++) {
int msg[3] = {-1, 0, 0};
if(chain_run_id >= 0) {
......@@ -402,7 +402,7 @@ int runner_pt_master::assign_new_chain(int rank_section) {
auto &chain = pt_chains_[chain_run.id];
msg[0] = chain.task_ids[target];
msg[1] = chain_run.run_id;
msg[2] = chain.target_sweeps + chain.sweeps;
msg[2] = std::max(1, chain.target_sweeps - chain.sweeps);
} else {
// this will prompt the slave to quit
num_active_ranks_--;
......@@ -577,6 +577,7 @@ void runner_pt_slave::start() {
job_.jobfile["jobconfig"].defined("pt_parameter_optimization");
if(!accept_new_chain()) {
job_.log(fmt::format("rank {} exits: out of work", rank_));
return;
}
......
......@@ -9,6 +9,6 @@ runner_task::runner_task(int target_sweeps, int sweeps,
scheduled_runs{scheduled_runs} {}
bool runner_task::is_done() const {
return sweeps > target_sweeps;
return sweeps >= target_sweeps;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment