diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/_config_options.yml b/_includes/snippets_library/Birmingham_Baskerville_slurm/_config_options.yml new file mode 100644 index 00000000..e91ba0b3 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/_config_options.yml @@ -0,0 +1,68 @@ +#------------------------------------------------------------ +# Birmingham Baskerville Slurm: Jenny Wong +#------------------------------------------------------------ + +# Cluster host and scheduler options: the defaults come from +# Graham at Compute Canada, running Slurm. Other options can +# be found in the library of snippets, +# `_includes/snippets_library`. To use one, replace options +# below with those in `_config_options.yml` from the +# library. E.g, to customise for Cirrus at EPCC, running +# PBS, we could replace the options below with those from +# +# _includes/snippets_library/EPCC_Cirrus_pbs/_config_options.yml +# +# If your cluster is not represented in the library, please +# copy an existing folder, rename it, and customize for your +# installation. Remember to keep the leading slash on the +# `snippets` variable below! + +snippets: "/snippets_library/Birmingham_Baskerville_slurm" + +local: + prompt: "[user@laptop ~]$" + bash_shebang: "#!/usr/bin/env bash" + +remote: + name: "Baskerville" + login: "login.baskerville.ac.uk" + host: "bask-pg0310u18a.cluster.baskerville.ac.uk" + node: "bask-pg" + location: "University of Birmingham, UK" + homedir: "/bask/homes/y/yourUsername" + user: "yourUsername" + prompt: "[yourUsername@bask-pg0310u18a ~]$" + bash_shebang: "#!/bin/bash" + +sched: + name: "Slurm" + submit: + name: "sbatch" + options: "" + queue: + debug: "devel" + testing: "normal" + status: "squeue" + flag: + user: "-u yourUsername" + interactive: "" + histdetail: "-l -j" + name: "-J" + time: "-t" + queue: "-p" + del: "scancel" + interactive: "srun" + info: "sinfo" + comment: "#SBATCH" + hist: "sacct -u $USER" + +episode_order: + - 10-hpc-intro + - 11-connecting + - 12-cluster + - 13-scheduler + - 14-modules + - 15-transferring-files + - 16-parallel + - 17-resources + - 18-responsibility diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/authenticity_of_host.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/authenticity_of_host.snip new file mode 100644 index 00000000..3103151b --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/authenticity_of_host.snip @@ -0,0 +1,16 @@ +> ## The authenticity of host +> +> When login in for the first time you may get a question whether you trust the +> server you are trying to connect. If you typed the address correctly (i.e. +> {{ site.login_host }}) then it is safe to say "yes" to the question at the +> end of this message and permanently added this server to trusted hosts. +> +> ~~~ +> $ ssh lola@{{ site.login_host }} +> The authenticity of host '{{ site.login_host }}' can't be established. +> RSA key fingerprint is SHA256:NwV2/9HMlLfj6hFmXTuA4UVievE/uq36K9EYa20CteI. +> Are you sure you want to continue connecting (yes/no)? yes +> Warning: Permanently added '{{ site.login_host }}' to the list of known hosts +> ~~~ +> {: .language-bash} +{: .callout} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/queue-info.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/queue-info.snip new file mode 100644 index 00000000..a1901e2a --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/queue-info.snip @@ -0,0 +1,19 @@ +``` +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +normal* up 7-00:00:00 1 down* c5-12 +normal* up 7-00:00:00 87 mix c1-[7,13-15,19,22-24,28,31,33,3....... +normal* up 7-00:00:00 110 alloc c1-[1-6,8-9,11-12,16-18,20-21,........ +normal* up 7-00:00:00 1 idle c1-10 +bigmem up 14-00:00:0 1 drain c6-3 +bigmem up 14-00:00:0 10 mix c3-[29-31,53,56],c6-[1,4-7] +bigmem up 14-00:00:0 1 alloc c6-2 +bigmem up 14-00:00:0 24 idle c3-[32-52,54-55],c6-8 +accel up 14-00:00:0 3 mix c7-[1-2,8] +accel up 14-00:00:0 5 alloc c7-[3-7] +optimist up infinite 1 down* c5-12 +optimist up infinite 1 drain c6-3 +optimist up infinite 100 mix c1-[7,13-15,19,22-24,28,31,33,35,37... +optimist up infinite 116 alloc c1-[1-6,8-9,11-12,16-18,20-21,25-27... +optimist up infinite 25 idle c1-10,c3-[32-52,54-55],c6-8 +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/specific-node-info.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/specific-node-info.snip new file mode 100644 index 00000000..68f5c28c --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/cluster/specific-node-info.snip @@ -0,0 +1,12 @@ +> ## Explore a Worker Node +> +> Finally, let's look at the resources available on the worker nodes where your +> jobs will actually run. Try running this command to see the name, CPUs and +> memory available on the worker nodes (the instructors will give you the ID of +> the compute node to use): +> +> ``` +> {{ site.host_prompt }} sinfo --node c3-12 -o "%n %c %m" +> ``` +> {: .language-bash} +{: .challenge} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/available-modules.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/available-modules.snip new file mode 100644 index 00000000..a14cb8fe --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/available-modules.snip @@ -0,0 +1,26 @@ +``` +---------------------- /cluster/modulefiles/all ------------------------- +4ti2/1.6.9-GCC-8.2.0-2.31.1 gmsh/4.5.6-foss-2019b-Python-3.7.4 +ABySS/2.0.2-gompi-2019a gnuplot/5.2.6-GCCcore-8.2.0 +AdapterRemoval/2.3.1-foss-2018b gnuplot/5.2.8-GCCcore-8.3.0 +AdapterRemoval/2.3.1-GCC-8.2.0-2.31.1 Go/1.13.1 +ADF/2019.103+StaticMKL gompi/2018b +AdmixTools/5.1-GCC-7.3.0-2.30 gompi/2019a +ADMIXTURE/1.3.0 gompi/2019b + +[removed most of the output here for clarity] + +----------------------- /cluster/modulefiles/external --------------------- + appusage/1.0 hpcx/2.4 hpcx/2.5 hpcx/2.6 + + Where: + S: Module is Sticky, requires --force to unload or purge + L: Module is loaded + Aliases: Aliases exist: foo/1.2.3 (1.2) means that "module load foo/1.2" + will load foo/1.2.3 + +Use "module spider" to find all possible modules. +Use "module keyword key1 key2 ..." to search for all possible modules matching +any of the "keys". +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/missing-python.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/missing-python.snip new file mode 100644 index 00000000..753a3271 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/missing-python.snip @@ -0,0 +1,9 @@ +``` +/usr/bin/which:no python3 in +(/opt/software/slurm/16.05.9/bin: +/cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/imkl/11.3.4.258/bin: +/opt/software/bin:/opt/puppetlabs/puppet/bin:/opt/software/slurm/current/bin: +/usr/local/bin:/usr/bin:/usr/local/sbin: +/usr/sbin:/home/yourUsername/.local/bin:/home/yourUsername/bin) +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/module-load-python.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/module-load-python.snip new file mode 100644 index 00000000..f461f07d --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/module-load-python.snip @@ -0,0 +1,5 @@ +``` +{{ site.host_prompt }} module load Python/3.7.2-GCCcore-8.2.0 +{{ site.host_prompt }} which python3 +``` +{: .language-bash} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-executable-dir.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-executable-dir.snip new file mode 100644 index 00000000..c0f68bf3 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-executable-dir.snip @@ -0,0 +1,4 @@ +``` +/cluster/software/Python/3.7.2-GCCcore-8.2.0/bin/python +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-ls-dir-command.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-ls-dir-command.snip new file mode 100644 index 00000000..a5161d3c --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-ls-dir-command.snip @@ -0,0 +1,4 @@ +``` +{{ site.host_prompt }} ls /cluster/software/Python/3.8.2-GCCcore-9.3.0/bin +``` +{: .language-bash} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-ls-dir-output.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-ls-dir-output.snip new file mode 100644 index 00000000..d9a7ba2f --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-ls-dir-output.snip @@ -0,0 +1,19 @@ +``` +2to3 idle3.7 pytest rst2odt.py +2to3-3.7 isympy python rst2pseudoxml.py +chardetect netaddr python3 rst2s5.py +cygdb nosetests python3.7 rst2xetex.py +cython nosetests-3.7 python3.7-config rst2xml.py +cythonize pasteurize python3.7m rstpep2html.py +dijitso pbr python3.7m-config runxlrd.py +easy_install pip python3-config sphinx-apidoc +easy_install-3.7 pip3 pyvenv sphinx-autogen +f2py pip3.7 pyvenv-3.7 sphinx-build +f2py3 pybabel rst2html4.py sphinx-quickstart +f2py3.7 __pycache__ rst2html5.py tabulate +ffc pydoc3 rst2html.py virtualenv +ffc-3 pydoc3.7 rst2latex.py wheel +futurize pygmentize rst2man.py +idle3 py.test rst2odt_prepstyles.py +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-module-path.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-module-path.snip new file mode 100644 index 00000000..b336efc0 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/python-module-path.snip @@ -0,0 +1,10 @@ +``` +/cluster/software/Python/3.8.2-GCCcore-9.3.0/bin: +/cluster/software/XZ/5.2.5-GCCcore-9.3.0/bin: +/cluster/software/bzip2/1.0.8-GCCcore-9.3.0/bin: +/cluster/software/binutils/2.34-GCCcore-9.3.0/bin: +/cluster/software/GCCcore/9.3.0/bin:/node/bin:/usr/local/bin: +/usr/bin:/usr/local/sbin:/usr/sbin:/cluster/bin: +/cluster/home/sabryr/.local/bin:/cluster/home/sabryr/bin +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/software-dependencies.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/software-dependencies.snip new file mode 100644 index 00000000..7ea4b2fe --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/software-dependencies.snip @@ -0,0 +1,93 @@ +To demonstrate, let's use `module list`. `module list` shows all loaded +software modules. + +``` +{{ site.host_prompt }} module list +``` +{: .language-bash} +``` + +Currently Loaded Modules: + 1) StdEnv (S) 6) libreadline/8.0-GCCcore-8.2.0 (H) + 2) GCCcore/8.2.0 7) XZ/5.2.4-GCCcore-8.2.0 (H) + 3) bzip2/1.0.6-GCCcore-8.2.0 (H) 8) GMP/6.1.2-GCCcore-8.2.0 (H) + 4) zlib/1.2.11-GCCcore-8.2.0 (H) 9) libffi/3.2.1-GCCcore-8.2.0 (H) + 5) ncurses/6.1-GCCcore-8.2.0 (H) 10) Python/3.7.2-GCCcore-8.2.0 + + Where: + S: Module is Sticky, requires --force to unload or purge + H: Hidden Module + +``` +{: .output} + +``` +{{ site.host_prompt }} module load Beast/2.5.2-GCC-8.2.0-2.31.1 +{{ site.host_prompt }} module list +``` +{: .language-bash} + +``` +Currently Loaded Modules: + 1) StdEnv (S) 9) libffi/3.2.1-GCCcore-8.2.0 + 2) GCCcore/8.2.0 10) Python/3.7.2-GCCcore-8.2.0 + 3) bzip2/1.0.6-GCCcore-8.2.0 (H) 11) binutils/2.31.1-GCCcore-8.2.0 + 4) zlib/1.2.11-GCCcore-8.2.0 (H) 12) GCC/8.2.0-2.31.1 + 5) ncurses/6.1-GCCcore-8.2.0 (H) 13) Java/11.0.2 + 6) libreadline/8.0-GCCcore-8.2.0 (H) 14) beagle-lib/3.1.2-GCC-8.2.0-2.31.1 + 7) XZ/5.2.4-GCCcore-8.2.0 (H) 15) Beast/2.5.2-GCC-8.2.0-2.31.1 + 8) GMP/6.1.2-GCCcore-8.2.0 (H) + + Where: + S: Module is Sticky, requires --force to unload or purge + H: Hidden Module + +``` +{: .output} + +So in this case, loading the `beast` module (a bioinformatics software +package), also loaded `Java/11.0.2` and `beagle-lib/3.1.2-GCC-8.2.0-2.31.1` as +well. Let's try unloading the `beast` package. + +``` +{{ site.host_prompt }} module unload Beast/2.5.2-GCC-8.2.0-2.31.1 +{{ site.host_prompt }} module list +``` +{: .language-bash} + +``` +Currently Loaded Modules: + 1) StdEnv (S) 8) GMP/6.1.2-GCCcore-8.2.0 (H) + 2) GCCcore/8.2.0 9) libffi/3.2.1-GCCcore-8.2.0 (H) + 3) bzip2/1.0.6-GCCcore-8.2.0 (H) 10) Python/3.7.2-GCCcore-8.2.0 + 4) zlib/1.2.11-GCCcore-8.2.0 (H) 11) binutils/2.31.1-GCCcore-8.2.0 (H) + 5) ncurses/6.1-GCCcore-8.2.0 (H) 12) GCC/8.2.0-2.31.1 + 6) libreadline/8.0-GCCcore-8.2.0 (H) 13) Java/11.0.2 + 7) XZ/5.2.4-GCCcore-8.2.0 (H) 14) beagle-lib/3.1.2-GCC-8.2.0-2.31.1 + + Where: + S: Module is Sticky, requires --force to unload or purge + H: Hidden Module + +``` +{: .output} + +So using `module unload` "un-loads" a module along with its dependencies. If we +wanted to unload everything at once, we could run `module purge` (unloads +everything). + +``` +{{ site.host_prompt }} module purge +``` +{: .language-bash} +``` +The following modules were not unloaded: + (Use "module --force purge" to unload all): + + 1) StdEnv +``` +{: .output} + +Note that `module purge` is informative. It lets us know that all but a default +set of packages have been unloaded (and how to actually unload these if we +truly so desired). diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/wrong-gcc-version.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/wrong-gcc-version.snip new file mode 100644 index 00000000..23ee5df8 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/modules/wrong-gcc-version.snip @@ -0,0 +1,34 @@ +Let's take a closer look at the `gcc` module. GCC is an extremely widely used +C/C++/Fortran compiler. Tons of software is dependent on the GCC version, and +might not compile or run if the wrong version is loaded. In this case, there +are few different versions: + +`GCC/7.3.0-2.30 GCC/8.2.0-2.31.1 GCC/8.3.0 GCC/9.3.0` + +How do we load each copy and which copy is the default? + +On SAGA and Fram we do not have default modules and we must use the full name +to load it. + +``` +{{ site.host_prompt }} module load gcc +``` +{: .language-bash} + +``` +Lmod has detected the following error: The following module(s) are unknown: +"gcc" + +Please check the spelling or version number. Also try "module spider ..." +It is also possible your cache file is out-of-date; it may help to try: + $ module --ignore-cache load "gcc" +``` +{: .output} + +To load a software module we must specify the full module name: + +``` +{{ site.host_prompt }} module load GCC/8.2.0-2.31.1 +{{ site.host_prompt }} gcc --version +``` +{: .language-bash} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/parallel/four-tasks-jobscript.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/parallel/four-tasks-jobscript.snip new file mode 100644 index 00000000..ac8effab --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/parallel/four-tasks-jobscript.snip @@ -0,0 +1,15 @@ +``` +{{ site.remote.bash_shebang }} +{{ site.sched.comment }} {{ site.sched.flag.name }} parallel-pi +{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} +{{ site.sched.comment }} -N 1 +{{ site.sched.comment }} -n 4 +{{ site.sched.comment }} --mem=3G + +# Load the computing environment we need +module load python3 + +# Execute the task +mpiexec python pi.py 100000000 +``` +{: .language-bash} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/parallel/one-task-with-memory-jobscript.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/parallel/one-task-with-memory-jobscript.snip new file mode 100644 index 00000000..5838157f --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/parallel/one-task-with-memory-jobscript.snip @@ -0,0 +1,15 @@ +``` +{{ site.remote.bash_shebang }} +{{ site.sched.comment }} {{ site.sched.flag.name }} serial-pi +{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} +{{ site.sched.comment }} -N 1 +{{ site.sched.comment }} -n 1 +{{ site.sched.comment }} --mem=3G + +# Load the computing environment we need +module load python3 + +# Execute the task +python pi.py 100000000 +``` +{: .language-bash} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/account-history.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/account-history.snip new file mode 100644 index 00000000..8bcac806 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/account-history.snip @@ -0,0 +1,6 @@ +``` + JobID JobName Partition Account AllocCPUS State ExitCode +------------ ---------- ---------- ---------- ---------- ---------- -------- +991167 Sxxxx normal nn9299k 128 COMPLETED 0:0 +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/bench.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/bench.snip new file mode 100644 index 00000000..a25fb9fb --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/bench.snip @@ -0,0 +1,18 @@ +> ## Benchmarking `fastqc` +> +> Create a job that runs the following command in the same directory as +> `.fastq` files +> +> ``` +> fastqc name_of_fastq_file +> ``` +> {: .language-bash} +> +> The `fastqc` command is provided by the `fastqc` module. You'll need to +> figure out a good amount of resources to ask for for this first "test run". +> You might also want to have the scheduler email you to tell you when the job +> is done. +> +> Hint: the job only needs 1 CPU and not too much memory or time. The trick is +> figuring out just how much you'll need! +{: .challenge} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/monitor-processes-top.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/monitor-processes-top.snip new file mode 100644 index 00000000..12685735 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/monitor-processes-top.snip @@ -0,0 +1,19 @@ +``` +top - 21:00:19 up 3:07, 1 user, load average: 1.06, 1.05, 0.96 +Tasks: 311 total, 1 running, 222 sleeping, 0 stopped, 0 zombie +%Cpu(s): 7.2 us, 3.2 sy, 0.0 ni, 89.0 id, 0.0 wa, 0.2 hi, 0.2 si, 0.0 st +KiB Mem : 16303428 total, 8454704 free, 3194668 used, 4654056 buff/cache +KiB Swap: 8220668 total, 8220668 free, 0 used. 11628168 avail Mem + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND + 1693 jeff 20 0 4270580 346944 171372 S 29.8 2.1 9:31.89 gnome-shell + 3140 jeff 20 0 3142044 928972 389716 S 27.5 5.7 13:30.29 Web Content + 3057 jeff 20 0 3115900 521368 231288 S 18.9 3.2 10:27.71 firefox + 6007 jeff 20 0 813992 112336 75592 S 4.3 0.7 0:28.25 tilix + 1742 jeff 20 0 975080 164508 130624 S 2.0 1.0 3:29.83 Xwayland + 1 root 20 0 230484 11924 7544 S 0.3 0.1 0:06.08 systemd + 68 root 20 0 0 0 0 I 0.3 0.0 0:01.25 kworker/4:1 + 2913 jeff 20 0 965620 47892 37432 S 0.3 0.3 0:11.76 code + 2 root 20 0 0 0 0 S 0.0 0.0 0:00.02 kthreadd +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/system-memory-free.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/system-memory-free.snip new file mode 100644 index 00000000..2f5a36ba --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/resources/system-memory-free.snip @@ -0,0 +1,6 @@ +``` + total used free shared buff/cache available +Mem: 3.8G 1.5G 678M 327M 1.6G 1.6G +Swap: 3.9G 170M 3.7G +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/basic-job-script.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/basic-job-script.snip new file mode 100644 index 00000000..6c7a41d6 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/basic-job-script.snip @@ -0,0 +1,4 @@ +``` +Submitted batch job 137860 +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/basic-job-status.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/basic-job-status.snip new file mode 100644 index 00000000..0101da71 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/basic-job-status.snip @@ -0,0 +1,5 @@ +``` +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) +137860 normal example- usernm R 0:02 1 c5-59 +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/del_job.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/del_job.snip new file mode 100644 index 00000000..6e2f0a26 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/del_job.snip @@ -0,0 +1,4 @@ +``` +{{ site.host_prompt }} scancel 38759 +``` +{: .language-bash} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/job-with-name-status.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/job-with-name-status.snip new file mode 100644 index 00000000..f6defbda --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/job-with-name-status.snip @@ -0,0 +1,5 @@ +``` +JOBID ACCOUNT NAME ST REASON START_TIME TIME TIME_LEFT NODES CPUS +38191 yourAccount hello-wo PD Priority N/A 0:00 1:00:00 1 1 +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/long_job.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/long_job.snip new file mode 100644 index 00000000..ccce837f --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/long_job.snip @@ -0,0 +1,9 @@ +``` +{{ site.remote.bash_shebang }} +#SBATCH -t 0:0:30 + +echo 'This script is running on:' +hostname +sleep 120 +``` +{: .language-bash} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/option-flags-list.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/option-flags-list.snip new file mode 100644 index 00000000..5e80b164 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/option-flags-list.snip @@ -0,0 +1,15 @@ +* `--ntasks=` or `-n `: How many CPU cores does your job need, + in total? + +* `--time ` or `-t `: + How much real-world time (walltime) will your job take to run? The `` + part can be omitted. + +* `--mem=`: How much memory on a node does your job need in + megabytes? You can also specify gigabytes using by adding a little "g" + afterwards (example: `--mem=5g`) + +* `--nodes=` or `-N `: How many separate machines does your job + need to run on? Note that if you set `ntasks` to a number greater than what + one machine can offer, {{ site.sched.name }} will set this value + automatically. diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/print-sched-variables.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/print-sched-variables.snip new file mode 100644 index 00000000..5234a4ed --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/print-sched-variables.snip @@ -0,0 +1,30 @@ +> ## Job environment variables +> +> When {{ site.sched.name }} runs a job, it sets a number of environment +> variables for the job. One of these will let us check what directory our job +> script was submitted from. The `SLURM_SUBMIT_DIR` variable is set to the +> directory from which our job was submitted. Using the `SLURM_SUBMIT_DIR` +> variable, modify your job so that it prints out the location from which the +> job was submitted. +> +> > ## Solution +> > +> > ``` +> > {{ site.remote.prompt }} nano example-job.sh +> > {{ site.remote.prompt }} cat example-job.sh +> > ``` +> > {: .language-bash} +> > +> > ``` +> > {{ site.remote.bash_shebang }} +> > #SBATCH -t 00:00:30 +> > +> > echo -n "This script is running on " +> > hostname +> > +> > echo "This job was launched in the following directory:" +> > echo ${SLURM_SUBMIT_DIR} +> > ``` +> > {: .output} +> {: .solution} +{: .challenge} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/runtime-exceeded-job.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/runtime-exceeded-job.snip new file mode 100644 index 00000000..1586e5cd --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/runtime-exceeded-job.snip @@ -0,0 +1,4 @@ +``` +{{ site.host_prompt }} cat slurm-38193.out +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/runtime-exceeded-output.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/runtime-exceeded-output.snip new file mode 100644 index 00000000..24d4a5bb --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/runtime-exceeded-output.snip @@ -0,0 +1,6 @@ +``` +This job is running on: c1-14 +slurmstepd: error: *** JOB 38193 ON gra533 CANCELLED AT 2017-07-02T16:35:48 +DUE TO TIME LIMIT *** +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-job-begin.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-job-begin.snip new file mode 100644 index 00000000..49480f91 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-job-begin.snip @@ -0,0 +1,7 @@ +``` +Submitted batch job 38759 + +JOBID ACCOUNT NAME ST REASON TIME TIME_LEFT NODES CPUS +38759 yourAccount example-job.sh PD Priority 0:00 1:00 1 1 +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-job-cancel.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-job-cancel.snip new file mode 100644 index 00000000..96caec62 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-job-cancel.snip @@ -0,0 +1,4 @@ +``` +JOBID USER ACCOUNT NAME ST REASON START_TIME TIME TIME_LEFT NODES CPUS +``` +{: .output} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-multiple-jobs.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-multiple-jobs.snip new file mode 100644 index 00000000..6c39a715 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/terminate-multiple-jobs.snip @@ -0,0 +1,9 @@ +> ## Cancelling multiple jobs +> +> We can also all of our jobs at once using the `-u` option. This will delete +> all jobs for a specific user (in this case us). Note that you can only delete +> your own jobs. +> +> Try submitting multiple jobs and then cancelling them all with +> `scancel -u yourUsername`. +{: .challenge} diff --git a/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/using-nodes-interactively.snip b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/using-nodes-interactively.snip new file mode 100644 index 00000000..20d42528 --- /dev/null +++ b/_includes/snippets_library/Birmingham_Baskerville_slurm/scheduler/using-nodes-interactively.snip @@ -0,0 +1,65 @@ +`srun` runs a single command on the cluster and then exits. Let's demonstrate +this by running the `hostname` command with `srun`. (We can cancel an `srun` +job with `Ctrl-c`.) + +``` +{{ site.host_prompt }} srun hostname +``` +{: .language-bash} +``` +gra752 +``` +{: .output} + +`srun` accepts all of the same options as `sbatch`. However, instead of +specifying these in a script, these options are specified on the command-line +when starting a job. To submit a job that uses 2 CPUs for instance, we could +use the following command: + +``` +{{ site.host_prompt }} srun -n 2 echo "This job will use 2 CPUs." +``` +{: .language-bash} +``` +This job will use 2 CPUs. +This job will use 2 CPUs. +``` +{: .output} + +Typically, the resulting shell environment will be the same as that for +`sbatch`. + +### Interactive jobs + +Sometimes, you will need a lot of resource for interactive use. Perhaps it's +our first time running an analysis or we are attempting to debug something that +went wrong with a previous job. Fortunately, Slurm makes it easy to start an +interactive job with `srun`: + +``` +{{ site.host_prompt }} srun --pty bash +``` +{: .language-bash} + +You should be presented with a bash prompt. Note that the prompt will likely +change to reflect your new location, in this case the compute node we are +logged on. You can also verify this with `hostname`. + +> ## Creating remote graphics +> +> To see graphical output inside your jobs, you need to use X11 forwarding. To +> connect with this feature enabled, use the `-Y` option when you login with +> `ssh` with the command `ssh -Y username@host`. +> +> To demonstrate what happens when you create a graphics window on the remote +> node, use the `xeyes` command. A relatively adorable pair of eyes should pop +> up (press `Ctrl-c` to stop). If you are using a Mac, you must have installed +> XQuartz (and restarted your computer) for this to work. +> +> If your cluster has the +> [slurm-spank-x11](https://github.com/hautreux/slurm-spank-x11) plugin +> installed, you can ensure X11 forwarding within interactive jobs by using the +> `--x11` option for `srun` with the command `srun --x11 --pty bash`. +{: .challenge} + +When you are done with the interactive job, type `exit` to quit your session.