diff --git a/image/Dockerfile b/image/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..acce48ad110551f8f3df10097abec117f3641b90 --- /dev/null +++ b/image/Dockerfile @@ -0,0 +1,46 @@ +FROM debian:buster + +ENV CHISEL_URL='https://github.com/jpillora/chisel/releases/download/v1.7.6/chisel_1.7.6_linux_amd64.gz' +ENV CHISEL_PORT='8888' +ENV CHISEL_BACKEND='http://localhost:8890' +ENV CHISEL_AUTHFILE='/authfile' + +ENV JUPYTERHUB_SERVICE_PREFIX='/' +ENV JUPYTERHUB_API_URL='https://jupyter.rwth-aachen.de/hub/api' + +RUN apt-get update && \ + apt-get -y install \ + ssh \ + sshpass \ + python \ + supervisor \ + openssh-server \ + nfs-ganesha-vfs \ + nfs-common \ + dbus \ + wget \ + python3-pip \ + socat + +RUN wget -O- ${CHISEL_URL} | gunzip > /usr/bin/chisel +RUN chmod +x /usr/bin/chisel + +RUN pip3 install \ + tornado \ + requests \ + jinja2 + +ENV NB_USER=jovyan +RUN useradd -ms /bin/bash ${NB_USER} + +ADD etc/ssh/sshd_config /etc/ssh +ADD etc/supervisor/supervisord.conf /etc/supervisor +ADD etc/ganesha/ganesha.conf /etc/ganesha +#ADD etc/supervisor/conf.d/nfs.conf /etc/supervisor/conf.d + +RUN mkdir -p /run/sshd /run/rpc_pipefs/nfs /var/run/dbus/ +RUN echo "{}\n" > /authfile +ADD template.html.j2 server.py / +RUN chmod +x /server.py + +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] diff --git a/image/etc/ganesha/ganesha.conf b/image/etc/ganesha/ganesha.conf new file mode 100644 index 0000000000000000000000000000000000000000..9eace4885f8d1d0f8fc2871b97b8582b6ab974eb --- /dev/null +++ b/image/etc/ganesha/ganesha.conf @@ -0,0 +1,30 @@ +NFS_CORE_PARAM +{ + Protocols = 4; +} + +EXPORT +{ + # Export Id (mandatory, each EXPORT must have a unique Export_Id) + Export_Id = 77; + + # Exported path (mandatory) + Path = /home/jovyan; + + # Pseudo Path (required for NFS v4) + Pseudo = /home/jovyan; + + # Required for access (default is None) + # Could use CLIENT blocks instead + Access_Type = RW; + + # Exporting FSAL + FSAL { + Name = VFS; + } +} + +NFS_KRB5 +{ + Active_krb5 = false; +} diff --git a/image/etc/ssh/sshd_config b/image/etc/ssh/sshd_config new file mode 100644 index 0000000000000000000000000000000000000000..e50bfbfdcbe53791e25475b262f4a86e949fa769 --- /dev/null +++ b/image/etc/ssh/sshd_config @@ -0,0 +1,14 @@ +ListenAddress [::1]:2222 +ListenAddress 127.0.0.1:2222 + +PermitEmptyPasswords yes + +ChallengeResponseAuthentication no + +UsePAM yes +X11Forwarding yes +PrintMotd no +AcceptEnv LANG LC_* + +# override default of no subsystems +Subsystem sftp /usr/lib/openssh/sftp-server diff --git a/image/etc/supervisor/conf.d/nfs.conf b/image/etc/supervisor/conf.d/nfs.conf new file mode 100644 index 0000000000000000000000000000000000000000..a0cd9bcc35115d68f9128e00c460f5f7cb01cecc --- /dev/null +++ b/image/etc/supervisor/conf.d/nfs.conf @@ -0,0 +1,48 @@ +[program:ganesha] +command = sh -c "sleep 5 && ganesha.nfsd -F -L /dev/stdout" +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 + +[program:rpcbind] +priority = 100 +command = rpcbind -f +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 + +[program:rpcstatd] +priority = 200 +command = rpc.statd -L -F +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 + +[program:rpc.idmapd] +priority = 300 +command = rpc.idmapd -f +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 + +[program:dbus] +priority = 300 +command = dbus-daemon --system --print-address --nofork +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 diff --git a/image/etc/supervisor/conf.d/sshd.conf b/image/etc/supervisor/conf.d/sshd.conf new file mode 100644 index 0000000000000000000000000000000000000000..a7f0e02f38ee72d1b1e802b97b8fd3a147707d38 --- /dev/null +++ b/image/etc/supervisor/conf.d/sshd.conf @@ -0,0 +1,8 @@ +[program:sshd] +command = /usr/sbin/sshd -D -e -p 2222 +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 diff --git a/image/etc/supervisor/supervisord.conf b/image/etc/supervisor/supervisord.conf new file mode 100644 index 0000000000000000000000000000000000000000..7c4a940c4e7ecbc57d3f0f94e7b8e243a83a80f4 --- /dev/null +++ b/image/etc/supervisor/supervisord.conf @@ -0,0 +1,38 @@ +[supervisord] +logfile = /dev/null +loglevel = info +pidfile = /var/run/supervisord.pid +nodaemon = true +user=root + +[include] +files = /etc/supervisor/conf.d/*.conf + +[program:server] +priority = 300 +command = /server.py +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 + +[program:chisel] +command = bash -c "chisel server --port %(ENV_CHISEL_PORT)s --reverse --authfile %(ENV_CHISEL_AUTHFILE)s --backend %(ENV_CHISEL_BACKEND)s 2>&1 | tee /chisel.log" +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 + +[program:sftp-server] +command = socat TCP6-LISTEN:7777 EXEC:/usr/lib/sftp-server +autostart = true +autorestart = true +stdout_logfile = /dev/stdout +stdout_logfile_maxbytes = 0 +stderr_logfile = /dev/stderr +stderr_logfile_maxbytes = 0 + diff --git a/image/server.py b/image/server.py new file mode 100755 index 0000000000000000000000000000000000000000..17d85d6f2d80a98b085ec896ecfca4f966af2bf2 --- /dev/null +++ b/image/server.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 + +import os +import re +import json +import requests +import secrets +import string +import socket +import time +import logging + +from jinja2 import Environment, FileSystemLoader, TemplateNotFound + +from http import HTTPStatus +import tornado.ioloop +import tornado.web + +JUPYTER_API_URL = os.environ.get('JUPYTERHUB_API_URL') +JUPYTER_API_TOKEN = os.environ.get('JUPYTERHUB_API_TOKEN') +JUPYTER_USERNAME = os.environ.get('JUPYTERHUB_USER') +JUPYTERHUB_SERVICE_PREFIX = os.environ.get('JUPYTERHUB_SERVICE_PREFIX') + +CHISEL_AUTHFILE = os.environ.get('CHISEL_AUTHFILE') +CHISEL_USERNAME = 'jovyan' +CHISEL_PASSWORD = ''.join(secrets.choice(string.ascii_letters + string.digits) for i in range(20)) + +state = 'init' + +def chisel_get_fingerprint(): + with open('/chisel.log', 'r') as f: + content = f.read() + + m = re.search(r'Fingerprint ([a-zA-Z0-9=+/]+)', content) + if m: + return m.group(1) + + +class TemplateRendering: + + def render_template(self, template_name, **kwargs): + template_dirs = [ + '/' + ] + env = Environment(loader=FileSystemLoader(template_dirs)) + + try: + template = env.get_template(template_name) + except TemplateNotFound: + raise TemplateNotFound(template_name) + return template.render(kwargs) + + +class MainHandler(tornado.web.RequestHandler, TemplateRendering): + + def get(self): + self.set_header('Connection', 'close') + self.request.connection.no_keep_alive = True + + args = { + 'hpc_username': 'your_hpc_username', + 'prefix': JUPYTERHUB_SERVICE_PREFIX + } + + response = self.render_template('template.html.j2', **args) + + self.write(response) + self.set_status(HTTPStatus.OK) + +class APIHandler(tornado.web.RequestHandler): + + def post(self): + self.set_header('Connection', 'close') + self.request.connection.no_keep_alive = True + + token_hdr = self.request.headers.get('Authorization') + token = token_hdr.split()[1] + if not token: + self.send_error(HTTPStatus.BAD_REQUEST) + return + + # Check token + r = requests.get(JUPYTER_API_URL+'/user', headers={ + 'Authorization': 'token ' + token + }) + if r.status_code != 200: + self.send_error(r.status_code) + return + + try: + should_stop = self.get_argument('stop') + if should_stop: + global state + state = 'release' + except: + pass + + response = { + 'jupyter': { + 'token': JUPYTER_API_TOKEN + }, + 'chisel': { + 'username': CHISEL_USERNAME, + 'password': CHISEL_PASSWORD, + 'fingerprint': chisel_get_fingerprint() + } + } + + self.write(response) + self.set_status(HTTPStatus.OK) + + +def main(): + global state + + FORMAT = '%(asctime)-15s %(message)s' + logging.basicConfig(level=logging.INFO, format=FORMAT) + + prefix = os.environ['JUPYTERHUB_SERVICE_PREFIX'] + + # Generate Chisel authentication details + with open(CHISEL_AUTHFILE + '.new', 'w') as f: + auth = { + f'{CHISEL_USERNAME}:{CHISEL_PASSWORD}': [''] + } + json.dump(auth, f) + + os.replace(CHISEL_AUTHFILE + '.new', CHISEL_AUTHFILE) + + ioloop = tornado.ioloop.IOLoop.current() + + def check_interruption(): + global state + if state != 'running': + logging.info('Stopping IO loop') + ioloop.stop() + + tornado.ioloop.PeriodicCallback(check_interruption, 100).start() + + while True: + if state == 'release': + logging.info('Released port. Start retrying in 10 sec..') + time.sleep(10) + + state = 'released' + + elif state == 'released': + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + result = sock.connect_ex(('::1', 8890)) + sock.close() + + if result == 0: # Port is open + logging.info('Port is occupied. Trying again in 1 sec..') + time.sleep(1) + else: + logging.info('Port is free. Starting..') + state = 'init' + + elif state == 'init': + logging.info('Starting IO loop') + state = 'running' + + app = tornado.web.Application([ + (prefix, MainHandler), + (prefix+'api/v1', APIHandler), + ]) + + server = app.listen(8890) + + ioloop.start() + + server.stop() + + elif state == 'terminating': + break + + +if __name__ == '__main__': + main() diff --git a/image/spawn.sh b/image/spawn.sh new file mode 100644 index 0000000000000000000000000000000000000000..9e78fef6c848831314652ae624d79840b9a88416 --- /dev/null +++ b/image/spawn.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +CLUSTER_LOGIN_NODE=login18-1.hpc.itc.rwth-aachen.de +DIR=~/.jupyter-rwth-hpc + +if [ -f "${DIR}/settings.sh" ]; then + . ${DIR}/settings.sh +fi + +# Run on cluster +CRUN="ssh -l ${CLUSTER_USER} ${CLUSTER_LOGIN_NODE} --" + +# Unique ID for this Jupyter spawn. +# Required for supporting multiple Jupyter jobs in SLURM +SPAWN_ID=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 8 | head -n 1) + +# set a random port for the notebook, in case multiple notebooks are +# on the same compute node. +PORT=$(shuf -i 8000-8500 -n 1) + +function finish { + # Close tunnel + if [ -n "${SSH_PID}" ]; then + kill ${SSH_PID} + fi + + # Cancel job + if [ -n $${JOB_ID} ]; then + ${CRUN} scancel ${JOB_ID} + fi +} +trap finish EXIT + +# Queue job +${CRUN} /bin/bash sbatch <<EOF +#!/bin/bash + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --mem-per-cpu=8G +#SBATCH --time=1-0:00:00 +#SBATCH --job-name=jupyter-${SPAWN_ID} +#SBATCH --output=jupyter-notebook-%J.log +#SBATCH --partition=c16m + +module switch intel gcc +module load python/3.6.0 + +python3 -m pip install --user --upgrade pip +python3 -m pip install --user --upgrade jupyterhub jupyterlab + +srun -n1 \$(python3.6 -m site --user-base)/bin/jupyterhub-singleuser --no-browser --ip=0.0.0.0 --port=${PORT} +EOF + +# Wait until job is scheduled +JOB_STATE="UNKNOWN" +while [ "${JOB_STATE}" != "RUNNING" ]; do + # Check job status + JOB_STATUS=$(${CRUN} squeue -u ${CLUSTER_USER} -h -o \"%.i %j %T %N %R\" | grep ${SPAWN_ID}) + if [ -z "${JOB_STATUS}" ]; then + break; + fi + + read -r JOB_ID JOB_NAME JOB_STATE JOB_NODE JOB_REASON <<<${JOB_STATUS} + echo "Waiting for job ${JOB_ID} (${JOB_NAME}) on node ${JOB_NODE} to run. Current state: ${JOB_STATE} (${JOB_REASON})" + sleep 1 +done + +# Setup tunnel +ssh -L"[::]:8888:${JOB_NODE}:${PORT}" -l ${CLUSTER_USER} ${CLUSTER_LOGIN_NODE} -N & +SSH_PID=$! + +echo "Jupyter started.." +echo +echo " Access at: http://localhost:8888" + +echo "Sleeping. Waiting for termination" +while true; do + sleep inf & + wait $! + echo "Sleep over.." # shouldnt happen? +done diff --git a/image/template.html.j2 b/image/template.html.j2 new file mode 100644 index 0000000000000000000000000000000000000000..55896cbc4460765bbcf76daafbdfdb8482e0efce --- /dev/null +++ b/image/template.html.j2 @@ -0,0 +1,45 @@ +<!DOCTYPE html> +<html lang="en"> + <title>Remote RWTHjupyter Session</title> + <link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/themes/prism.css" rel="stylesheet" /> + <body> + <h1>Run your RWTHjupyter session on a remote machine such as the RWTH HPC cluster</h1> + <p>Please run the following commands on one of the HPC frontend nodes:</p> + <pre> + <code class="language-bash"> + # Login to one of the HPC frontend nodes via your SSH key or password: + $ ssh -l "{{hpc_username}}" login18-1.hpc.itc.rwth-aachen.de + + # Add you local Python script to the PATH + $ PATH=$HOME/.local/bin:$PATH + $ echo 'PATH=$HOME/.local/bin:$PATH' >> ~/.bashrc + + # Install Jupyter scripts + $ python3 -m pip install --user --upgrade pip + $ python3 -m pip install --user rwth-jupyter + + # Connect to RWTHjupyter cluster + # If you want to start another session lateron, only this command is required + $ rwth-jupyter + </code> + </pre> + <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/prism.min.js"></script> + <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/components/prism-bash.min.js"></script> + <script> + function checkReady() { + console.log('Checking if session is ready...'); + fetch('{{prefix}}api') + .then(function(response) { + if (response.ok) { + console.log('Session is ready. Reloading..'); + window.location.reload(false); + } + }); + } + + window.onload = function() { + setInterval(checkReady, 1000); + } + </script> + </body> +</html>