jobstatus.py 3.22 KB
Newer Older
Lukas Weber's avatar
Lukas Weber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/usr/bin/env python3

import glob
import h5py
import os
import yaml

"""
This module extracts progress information from job data.
"""

class TaskProgress:
    pass

class JobProgress:
    def __init__(self, jobfile):
        self.jobfile = jobfile
18
        self.tasks = jobfile.tasks.keys()
Lukas Weber's avatar
Lukas Weber committed
19
20
21
22
23
24
        self.restart = False

        self.progress = []
        for task in self.tasks:
            tp = TaskProgress()
            
25
26
            tp.target_sweeps = jobfile.tasks[task]['sweeps']
            tp.target_therm = jobfile.tasks[task]['thermalization']
Lukas Weber's avatar
Lukas Weber committed
27
28
29
30
31
32

            tp.sweeps = 0
            tp.therm_sweeps = 0

            tp.num_runs = 0
            
33
            for runfile in glob.iglob('{}.data/{}/run*.dump.h5'.format(self.jobfile.jobname,task)):
Lukas Weber's avatar
Lukas Weber committed
34
35
36
37
38
39
40
                tp.num_runs += 1

                with h5py.File(runfile, 'r') as f:
                    tp.sweeps += f['/sweeps'][0]
                    tp.therm_sweeps += f['/thermalization_sweeps'][0]


Lukas Weber's avatar
Lukas Weber committed
41
            if tp.sweeps < tp.target_sweeps + tp.target_therm:
Lukas Weber's avatar
Lukas Weber committed
42
43
44
45
                self.restart = True

            self.progress.append(tp)

Lukas Weber's avatar
Lukas Weber committed
46
    def needs_restart(self):
Lukas Weber's avatar
Lukas Weber committed
47
48
        return self.restart
        
Lukas Weber's avatar
Lukas Weber committed
49
    def needs_merge(self):
Lukas Weber's avatar
Lukas Weber committed
50
51
        result_mtime = 0
        try:
52
            result_mtime = os.path.getmtime(self.jobfile.jobname+'.results.yml')
Lukas Weber's avatar
Lukas Weber committed
53
54
55
56
        except FileNotFoundError:
            return True

        for task in self.tasks:
57
            for measfile in glob.iglob('{}.data/{}/run*.meas.h5'.format(self.jobfile.jobname, task)):
Lukas Weber's avatar
Lukas Weber committed
58
59
60
61
62
                if os.path.getmtime(measfile) > result_mtime:
                    return True

        return False
        
63
64
def print_status(jobfile, args):
    """ This function is exported as the loadl status command """
Lukas Weber's avatar
Lukas Weber committed
65
66
67
68
69
70
71
72
73
74
75
    
    import argparse
    import sys

    parser = argparse.ArgumentParser(description='Prints the status and progress of a loadleveller Monte Carlo job.')
    parser.add_argument('--need-restart', action='store_true', help='Return 1 if the job is not completed yet and 0 otherwise.')
    parser.add_argument('--need-merge', action='store_true', help='Return 1 if the merged results are older than the raw data and 0 otherwise.')
    
    args = parser.parse_args()

    try:
76
        job_prog = JobProgress(jobfile)
Lukas Weber's avatar
Lukas Weber committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

        if args.need_restart and args.need_merge:
            print("Error: only one option of '--need-restart' and '--need-merge' can appear at once", file=sys.stderr)
            sys.exit(-1)

        if args.need_restart:
            if job_prog.needs_restart():
                print('Needs restart!')
                return True
            print('Job completed.')
            return False

        if args.need_merge:
            if job_prog.needs_merge():
                print('Needs merge!')
92
                return True
Lukas Weber's avatar
Lukas Weber committed
93
94
95
96
            print('Job already merged.')
            return False

        for task, tp in zip(job_prog.tasks, job_prog.progress):
97
98
            therm_per_run = tp.therm_sweeps/tp.num_runs if tp.num_runs > 0 else 0
            print('{t}: {tp.num_runs} runs, {tp.sweeps}/{tp.target_sweeps} sweeps, {therm_per_run}/{tp.target_therm} thermalization'.format(t=task,tp=tp,therm_per_run=int(round(therm_per_run))))
Lukas Weber's avatar
Lukas Weber committed
99
100
101
        
    except FileNotFoundError as e:
        print("Error: jobfile '{}' not found.".format(args.jobfile))