diff --git a/manual/cluster_job_GPU_and_sweep_training_template.job b/manual/cluster_job_GPU_and_sweep_training_template.job new file mode 100644 index 0000000000000000000000000000000000000000..e25032fdc7523f16f289e1baf7dfff9191575229 --- /dev/null +++ b/manual/cluster_job_GPU_and_sweep_training_template.job @@ -0,0 +1,62 @@ +#!/usr/bin/zsh + +############################################## +##### Batch script for the MRCNN training #### +############################################## + +#### CREATE SBATCH ENTRIES #### +#### Paths and parameters must be adapted accordingly. + +#### job name +#SBATCH --job-name=<JobName> + +#### Path and name of the output file of the job execution +#SBATCH --output=/home/<UserID>/.../<JobOutputFolderName>/%x_%J_output.txt + +#### Job runtime determined by testing jobs on the GPU node (see manual). +#### Multiply the computing time per epoch resulting from the test by the number of epochs to be trained. +#### Add a safety factor, e.g. multiply with 1.2 +#SBATCH --time=0-00:00:00 + +#### Memory requirement per GPU determined by testing jobs on the GPU node (see manual). +#### Add a safety factor, e.g. multiply with 1.2. +#### For example: resulting value is 5GB --> --mem-per-gpu=5G +#SBATCH --mem-per-gpu=5G + +#### E-mail address +#SBATCH --mail-user=<EmailAdress> + +#### E-mails to be received +#SBATCH --mail-type=ALL + +#### Number of tasks to be performed +#SBATCH --ntasks=1 + +#### Number of GPUs required per node +#SBATCH --gres=gpu:1 + +#### Definition of the job array starting at 0. ### +#### This parameter is only required if you want to perform several jobs in parallel +#### from one job script, e.g. grid search via Weights and Biases sweep. +#### In this example we perform a grid search with 6 jobs --> array=0-5 +#SBATCH --array=0-5 + +#### CREATE TERMINAL ENTRIES #### +#### Paths and parameters must be adapted accordingly + +#### Loading the Cuda module +module load cuda/10.0 + +#### Export path in which Anaconda is located +export PATH=$PATH:/home/<UserID>/anaconda3/bin + +#### Activate environment +source activate env_mrcnn_gpu + +#### Navigate to the path where the droplet.py script is located +cd /home/<UserID>/.../samples/droplet/ + +#### Run MRCNN via Weights and Biases. +#### The <SweepCode> is generated after a sweep is created at the Weights and Biases homepage. +#### All training parameters are specified in the sweep configuration. +wandb agent --count 1 avt-droplet-detection/paper/<SweepCode> \ No newline at end of file