loadl 3.79 KB
Newer Older
1 2 3 4 5 6 7 8
#!/usr/bin/env python3

import sys
import argparse
import subprocess
from loadleveller import jobfile
import os

9
parser = argparse.ArgumentParser(description='Helper script for running and managing loadleveller Monte Carlo jobs.', usage='''loadl <command> <jobscript> [<args>]
10

11
<jobscript> is an executable that prints the job parameter YAML-file to stdout. It is convenient to use the taskmaker python module for this purpose.
12 13 14 15 16 17 18 19 20 21

Possible commands and their shorthands are
    delete, d  delete all data related to a job
    merge, m   merges results of an unfinished job into an output file
    run, r     runs the job
    status, s  print job completion information''')
    
parser.add_argument('command')
parser.add_argument('jobfile')

22
args, leftover_args = parser.parse_known_args()
23

24
# all paths are relative to the jobscript
25 26
jobdir = os.path.dirname(args.jobfile)
if jobdir != '':
27
    print('$ cd '+jobdir)
28 29
    os.chdir(jobdir)

30
try:
31
    job = jobfile.JobFile(os.path.basename(args.jobfile))
32 33 34 35
except jobfile.JobFileGenError as e:
    print('Error: {}'.format(e))
    sys.exit(1)

36 37 38 39 40 41
def run():
    import glob
    from loadleveller import clusterutils
    
    parser = argparse.ArgumentParser(description='run a loadleveller job on a cluster or locally')

42 43 44 45
    parser.add_argument('-s','--single', action='store_true', help='run in the single core scheduler mode')
    parser.add_argument('-f', '--force', action='store_true', help='ignore warnings about possible job corruption')
    parser.add_argument('-r', '--restart', action='store_true', help='delete all existing job data before starting.')
    args_run = parser.parse_args(leftover_args)
46 47 48 49 50 51 52

    if args_run.restart:
        delete()
    else:
        # check age of the different files
        binary_modtime = os.stat(job.jobconfig['mc_binary']).st_mtime
        try:
Lukas Weber's avatar
Lukas Weber committed
53
            f = next(glob.iglob('{}.data/*/*.h5'.format(job.jobname))) # only check one of the output files for speed
54 55 56 57
            data_modtime = os.stat(f).st_mtime

            label = 'Warning' if args_run.force else 'Error'
            if binary_modtime > data_modtime:
Lukas Weber's avatar
Lukas Weber committed
58 59
                print('{}: binary \'{}\' is newer than the checkpoint files.'.format(label, job.jobconfig['mc_binary']))
                if not args_run.force:
60 61 62 63 64
                    print('Use \'--restart\' to start from a blank run or use \'--force\' to proceed if you are sure\nthe changes you made are compatible.')
                    sys.exit(1)
        except StopIteration:
            pass

65
    job_input_filename = job.write_job_input_file()
66
    if args_run.single:
67 68 69
        cmd = '{} single "{}"'.format(job.jobconfig['mc_binary'], job_input_filename)
        print('$ '+cmd)
        subprocess.run(cmd, shell=True)
70
    else:
71
        clusterutils.run(job.jobname, job.jobconfig, [job.jobconfig['mc_binary'], job_input_filename])
72 73 74

def delete():
    import shutil
Lukas Weber's avatar
Lukas Weber committed
75 76 77 78 79 80 81 82 83
    datadir = '{}.data'.format(job.jobname)
    results_file = '{}.results.yml'.format(job.jobname)

    if os.path.exists(datadir):
        print('$ rm -r {}'.format(datadir))
        shutil.rmtree(datadir)
    if os.path.exists(results_file):
        print('$ rm {}'.format(results_file))
        os.unlink(results_file)
84 85

def merge():
86
    job_input_filename = job.write_job_input_file()
87 88 89
    cmd = '{} merge "{}"'.format(job.jobconfig['mc_binary'], job_input_filename)
    print('$ '+cmd)
    subprocess.run(cmd, shell=True)
90 91 92

def status():
    from loadleveller import jobstatus
93
    rc = jobstatus.print_status(job, leftover_args)
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
    sys.exit(rc)


if args.command == 'delete' or args.command == 'd':
    delete()
elif args.command == 'merge' or args.command == 'm':
    merge()
elif args.command == 'run' or args.command == 'r':
    run()
elif args.command == 'status' or args.command == 's':
    status()
else:
    print('Unknown command \'{}\'.'.format(args.command))
    parser.print_help()
    sys.exit(1)