jobstatus.py 3.23 KB
Newer Older
Lukas Weber's avatar
Lukas Weber committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#!/usr/bin/env python3

import glob
import h5py
import os
import yaml

"""
This module extracts progress information from job data.
"""

class TaskProgress:
    pass

class JobProgress:
    def __init__(self, jobfile):
        self.jobfile = jobfile
18
        self.tasks = jobfile.tasks.keys()
Lukas Weber's avatar
Lukas Weber committed
19 20 21 22 23 24
        self.restart = False

        self.progress = []
        for task in self.tasks:
            tp = TaskProgress()
            
25 26
            tp.target_sweeps = jobfile.tasks[task]['sweeps']
            tp.target_therm = jobfile.tasks[task]['thermalization']
Lukas Weber's avatar
Lukas Weber committed
27 28 29 30 31 32

            tp.sweeps = 0
            tp.therm_sweeps = 0

            tp.num_runs = 0
            
33
            for runfile in glob.iglob('{}.data/{}/run*.dump.h5'.format(self.jobfile.jobname,task)):
Lukas Weber's avatar
Lukas Weber committed
34 35 36 37 38 39 40
                tp.num_runs += 1

                with h5py.File(runfile, 'r') as f:
                    tp.sweeps += f['/sweeps'][0]
                    tp.therm_sweeps += f['/thermalization_sweeps'][0]


Lukas Weber's avatar
Lukas Weber committed
41
            if tp.sweeps < tp.target_sweeps + tp.target_therm:
Lukas Weber's avatar
Lukas Weber committed
42 43 44 45
                self.restart = True

            self.progress.append(tp)

Lukas Weber's avatar
Lukas Weber committed
46
    def needs_restart(self):
Lukas Weber's avatar
Lukas Weber committed
47 48
        return self.restart
        
Lukas Weber's avatar
Lukas Weber committed
49
    def needs_merge(self):
Lukas Weber's avatar
Lukas Weber committed
50 51
        result_mtime = 0
        try:
52
            result_mtime = os.path.getmtime(self.jobfile.jobname+'.results.yml')
Lukas Weber's avatar
Lukas Weber committed
53 54 55 56
        except FileNotFoundError:
            return True

        for task in self.tasks:
57
            for measfile in glob.iglob('{}.data/{}/run*.meas.h5'.format(self.jobfile.jobname, task)):
Lukas Weber's avatar
Lukas Weber committed
58 59 60 61 62
                if os.path.getmtime(measfile) > result_mtime:
                    return True

        return False
        
63 64
def print_status(jobfile, args):
    """ This function is exported as the loadl status command """
Lukas Weber's avatar
Lukas Weber committed
65 66 67 68 69 70 71 72
    
    import argparse
    import sys

    parser = argparse.ArgumentParser(description='Prints the status and progress of a loadleveller Monte Carlo job.')
    parser.add_argument('--need-restart', action='store_true', help='Return 1 if the job is not completed yet and 0 otherwise.')
    parser.add_argument('--need-merge', action='store_true', help='Return 1 if the merged results are older than the raw data and 0 otherwise.')
    
Lukas Weber's avatar
Lukas Weber committed
73
    args = parser.parse_args(args)
Lukas Weber's avatar
Lukas Weber committed
74 75

    try:
76
        job_prog = JobProgress(jobfile)
Lukas Weber's avatar
Lukas Weber committed
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91

        if args.need_restart and args.need_merge:
            print("Error: only one option of '--need-restart' and '--need-merge' can appear at once", file=sys.stderr)
            sys.exit(-1)

        if args.need_restart:
            if job_prog.needs_restart():
                print('Needs restart!')
                return True
            print('Job completed.')
            return False

        if args.need_merge:
            if job_prog.needs_merge():
                print('Needs merge!')
92
                return True
Lukas Weber's avatar
Lukas Weber committed
93 94 95 96
            print('Job already merged.')
            return False

        for task, tp in zip(job_prog.tasks, job_prog.progress):
97 98
            therm_per_run = tp.therm_sweeps/tp.num_runs if tp.num_runs > 0 else 0
            print('{t}: {tp.num_runs} runs, {tp.sweeps}/{tp.target_sweeps} sweeps, {therm_per_run}/{tp.target_therm} thermalization'.format(t=task,tp=tp,therm_per_run=int(round(therm_per_run))))
Lukas Weber's avatar
Lukas Weber committed
99 100 101
        
    except FileNotFoundError as e:
        print("Error: jobfile '{}' not found.".format(args.jobfile))