clusterutils.py 2.47 KB
Newer Older
1
2
import tempfile
import os
Lukas Weber's avatar
Lukas Weber committed
3
import sys
4

Lukas Weber's avatar
Lukas Weber committed
5
batchscript_claix18 = '''#!/usr/bin/env zsh
6

7
# Autogenerated jobscript targeting the claix18 cluster.
8
9
10
11
12
13

#SBATCH --job-name={jobname}
#SBATCH --time={walltime}
#SBATCH --mem-per-cpu={mem_per_cpu}
#SBATCH --ntasks={num_cores}
#SBATCH --export=NONE
14
15
#SBATCH --output={jobname}.data/stdout.log
#SBATCH --error={jobname}.data/stderr.log
16
17

{custom_cmds}
Lukas Weber's avatar
Lukas Weber committed
18
{mpirun} $FLAGS_MPI_BATCH {mc_cmd}
19
20
21
22
23
24
'''

batch_commands = {
    'claix18': 'sbatch {batchscript}'
}

25
26
valid_systems = ['local', 'claix18']

27
28
29
30
31
32
33
34
def generate_batchscript_claix18(cmd, jobname, jobconfig):
    template = batchscript_claix18

    custom_cmds = ''
    if 'project' in jobconfig:
        custom_cmds += '#SBATCH --account={}\n'.format(jobconfig['project'])
    custom_cmds += jobconfig.get('custom_cmds', '')

Lukas Weber's avatar
Lukas Weber committed
35
36
37
38
39
40
41
    try:
        return template.format(
            jobname=jobname,
            mpirun=jobconfig.get('mpirun','mpirun'),
            mem_per_cpu=jobconfig.get('mem_per_cpu','2G'),
            walltime=jobconfig['mc_walltime'],
            num_cores=jobconfig['num_cores'],
42
            custom_cmds=custom_cmds,
Lukas Weber's avatar
Lukas Weber committed
43
44
45
            mc_cmd=' '.join(cmd)
        )
    except KeyError as e:
46
        raise Exception('Error: required key "{}" missing in jobconfig'.format(e.args[0]))
Lukas Weber's avatar
Lukas Weber committed
47
        
48
49
50
51
52
def generate_batchscript(sysinfo, *args):
    if sysinfo == 'claix18':
        return generate_batchscript_claix18(*args)
    else:
        raise Exception('unknown system type {}'.format(sysinfo))
53
54

def determine_system():
55
    sysinfo = os.environ.get('MCLL_SYSTEM_INFO')
56
57
58
59
60
61
62
63

    if sysinfo == "":
        import socket
        hostname = socket.gethostname()
        if hostname.endswith('.hpc.itc.rwth-aachen.de'):
            sysinfo = 'claix18'

    if not sysinfo in valid_systems:
64
65
        sysinfo = 'local'

66
67
68
69
70
71
72
    return sysinfo
    
def run(jobname, jobconfig, cmd):
    sysinfo = determine_system()

    print('running on system \'{}\''.format(sysinfo))

73
74
75
76
77
78
    if sysinfo == 'local':
        mpicmd = '{} -n {} {}'.format(jobconfig['mpirun'], jobconfig['num_cores'], ' '.join(cmd))
        print('$ '+mpicmd)
        os.system(mpicmd)
    else:
        with tempfile.NamedTemporaryFile(mode='w',delete=False) as f:
79
            batchscript = generate_batchscript(sysinfo, cmd, jobname, jobconfig)
80
81
            print(batchscript)
            f.write(batchscript)
Lukas Weber's avatar
Lukas Weber committed
82
            bscriptname = f.name
83
84
        mpicmd = batch_commands[sysinfo].format(batchscript=bscriptname)
        print('$ '+mpicmd)
Lukas Weber's avatar
Lukas Weber committed
85
86
        os.system(mpicmd)
        os.unlink(f.name)