jobstatus.py 3.37 KB
Newer Older
Lukas Weber's avatar
Lukas Weber committed
1 2 3 4 5 6 7 8 9 10 11 12 13
#!/usr/bin/env python3

import glob

"""
This module extracts progress information from job data.
"""

class TaskProgress:
    pass

class JobProgress:
    def __init__(self, jobfile):
Lukas Weber's avatar
Lukas Weber committed
14
        import h5py
Lukas Weber's avatar
Lukas Weber committed
15
        self.jobfile = jobfile
16 17
        self.tasks = list(jobfile.tasks.keys())
        self.tasks.sort()
Lukas Weber's avatar
Lukas Weber committed
18 19 20 21 22 23
        self.restart = False

        self.progress = []
        for task in self.tasks:
            tp = TaskProgress()
            
24 25
            tp.target_sweeps = jobfile.tasks[task]['sweeps']
            tp.target_therm = jobfile.tasks[task]['thermalization']
Lukas Weber's avatar
Lukas Weber committed
26 27 28 29 30 31

            tp.sweeps = 0
            tp.therm_sweeps = 0

            tp.num_runs = 0
            
32
            for runfile in glob.iglob('{}.data/{}/run*.dump.h5'.format(self.jobfile.jobname,task)):
Lukas Weber's avatar
Lukas Weber committed
33 34 35
                tp.num_runs += 1

                with h5py.File(runfile, 'r') as f:
36 37 38
                    sweeps = f['/sweeps'][0]//jobfile.tasks[task].get('pt_sweeps_per_global_update',1)
                    
                    tp.therm_sweeps += min(sweeps,tp.target_therm)
Lukas Weber's avatar
Lukas Weber committed
39
                    tp.sweeps += max(0,sweeps - tp.target_therm)
Lukas Weber's avatar
Lukas Weber committed
40 41


42
            if tp.therm_sweeps < tp.target_therm or tp.sweeps < tp.target_sweeps:
Lukas Weber's avatar
Lukas Weber committed
43 44 45 46
                self.restart = True

            self.progress.append(tp)

Lukas Weber's avatar
Lukas Weber committed
47
    def need_restart(self):
Lukas Weber's avatar
Lukas Weber committed
48 49
        return self.restart
        
Lukas Weber's avatar
Lukas Weber committed
50 51
def job_need_merge(jobfile):
    import os
Lukas Weber's avatar
Lukas Weber committed
52

Lukas Weber's avatar
Lukas Weber committed
53 54 55 56 57
    result_mtime = 0
    try:
        result_mtime = os.path.getmtime(jobfile.jobname+'.results.yml')
    except FileNotFoundError:
        return True
Lukas Weber's avatar
Lukas Weber committed
58

Lukas Weber's avatar
Lukas Weber committed
59 60 61 62 63 64
    for task in jobfile.tasks:
        for measfile in glob.iglob('{}.data/{}/run*.meas.h5'.format(jobfile.jobname, task)):
            if os.path.getmtime(measfile) > result_mtime:
                return True

    return False
Lukas Weber's avatar
Lukas Weber committed
65
        
66 67
def print_status(jobfile, args):
    """ This function is exported as the loadl status command """
Lukas Weber's avatar
Lukas Weber committed
68 69 70 71 72 73 74 75
    
    import argparse
    import sys

    parser = argparse.ArgumentParser(description='Prints the status and progress of a loadleveller Monte Carlo job.')
    parser.add_argument('--need-restart', action='store_true', help='Return 1 if the job is not completed yet and 0 otherwise.')
    parser.add_argument('--need-merge', action='store_true', help='Return 1 if the merged results are older than the raw data and 0 otherwise.')
    
Lukas Weber's avatar
Lukas Weber committed
76
    args = parser.parse_args(args)
Lukas Weber's avatar
Lukas Weber committed
77 78

    try:
Lukas Weber's avatar
Lukas Weber committed
79 80 81 82 83 84 85
        if args.need_merge:
            if job_need_merge(jobfile):
                print('Needs merge!')
                return 0
            print('Job already merged.')
            return 1
        
86
        job_prog = JobProgress(jobfile)
Lukas Weber's avatar
Lukas Weber committed
87 88 89 90 91
        if args.need_restart and args.need_merge:
            print("Error: only one option of '--need-restart' and '--need-merge' can appear at once", file=sys.stderr)
            sys.exit(-1)

        if args.need_restart:
Lukas Weber's avatar
Lukas Weber committed
92
            if job_prog.need_restart():
Lukas Weber's avatar
Lukas Weber committed
93
                print('Needs restart!')
Lukas Weber's avatar
Lukas Weber committed
94
                return 0
Lukas Weber's avatar
Lukas Weber committed
95
            print('Job completed.')
Lukas Weber's avatar
Lukas Weber committed
96
            return 1
Lukas Weber's avatar
Lukas Weber committed
97 98 99


        for task, tp in zip(job_prog.tasks, job_prog.progress):
100
            therm_per_run = tp.therm_sweeps/tp.num_runs if tp.num_runs > 0 else 0
101
            print('{t}: {tp.num_runs} runs, {tp.sweeps:8d}/{tp.target_sweeps} sweeps, {therm_per_run:8d}/{tp.target_therm} thermalization'.format(t=task,tp=tp,therm_per_run=int(round(therm_per_run))))
Lukas Weber's avatar
Lukas Weber committed
102 103 104
        
    except FileNotFoundError as e:
        print("Error: jobfile '{}' not found.".format(args.jobfile))