-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0b6512a
commit f5516cc
Showing
6 changed files
with
390 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
include: | ||
- remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' | ||
|
||
stages: | ||
- baseimage | ||
|
||
build aarch64: | ||
extends: .container-builder-cscs-gh200 | ||
stage: baseimage | ||
timeout: 10h | ||
variables: | ||
DOCKERFILE: ci/slurm_docker/Dockerfile | ||
WATCH_FILECHANGES: ci/slurm_docker/Dockerfile ci/slurm_docker/cgroup.conf ci/slurm_docker/entrypoint.sh ci/slurm_docker/install_slurm.sh ci/slurm_docker/slurm.conf.in | ||
PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/base/slurm-uenv-mount |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
FROM opensuse/leap:15.4 | ||
|
||
ARG SLURM_VERSION=23.02.7 | ||
ARG SLURM_ROOT=/usr | ||
ARG SLURM_CONFDIR=/etc/slurm | ||
|
||
ENV SLURM_VERSION ${SLURM_VERSION} | ||
ENV SLURM_ROOT ${SLURM_ROOT} | ||
ENV SLURM_CONFDIR ${SLURM_CONFDIR} | ||
|
||
|
||
RUN zypper install -y \ | ||
munge \ | ||
munge-devel \ | ||
libnuma1 \ | ||
libnuma-devel \ | ||
librrd8 \ | ||
readline-devel \ | ||
hwloc \ | ||
hwloc-devel \ | ||
lz4 \ | ||
liblz4-devel \ | ||
libz1 \ | ||
zlib-devel \ | ||
freeipmi \ | ||
freeipmi-devel \ | ||
dbus-1 \ | ||
dbus-1-devel \ | ||
make \ | ||
gcc \ | ||
gcc-c++ \ | ||
curl \ | ||
tar \ | ||
bzip2 \ | ||
python3 \ | ||
vim \ | ||
ca-certificates \ | ||
less \ | ||
sudo | ||
|
||
RUN zypper install -y \ | ||
lua53 \ | ||
lua53-devel \ | ||
libmount-devel | ||
|
||
RUN useradd -M slurm | ||
|
||
RUN mkdir -p /var/log/slurm | ||
RUN mkdir -p /var/spool/slurmctld && chown slurm /var/spool/slurmctld && chmod u+rwx /var/spool/slurmctld | ||
RUN mkdir -p /var/spool/slurmd && chown slurm /var/spool/slurmd && chmod u+rwx /var/spool/slurmd | ||
|
||
|
||
COPY install_slurm.sh . | ||
|
||
RUN ./install_slurm.sh ${SLURM_VERSION} ${SLURM_ROOT} ${SLURM_CONFDIR} --enable-multiple-slurmd | ||
|
||
RUN mkdir -p ${SLURM_CONFDIR} | ||
COPY cgroup.conf ${SLURM_CONFDIR} | ||
COPY slurm.conf.in ${SLURM_CONFDIR} | ||
|
||
COPY entrypoint.sh . | ||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["bash"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
CgroupAutomount=yes | ||
ConstrainCores=no | ||
ConstrainRAMSpace=no | ||
CgroupMountpoint=/sys/fs/cgroup | ||
CgroupPlugin=cgroup/v1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/bin/bash | ||
|
||
dbus-launch | ||
sudo -u munge munged | ||
|
||
: "${SLURM_CONF_IN=$SLURM_CONFDIR/slurm.conf.in}" | ||
: "${SLURM_CONF=$SLURM_CONFDIR/slurm.conf}" | ||
|
||
# Default number of slurm nodes | ||
: "${SLURM_NUMNODES=3}" | ||
|
||
# Default slurm controller | ||
: "${SLURMCTLD_HOST=$HOSTNAME}" | ||
: "${SLURMCTLD_ADDR=127.0.0.1}" | ||
|
||
# Default node info | ||
: "${NODE_HOST=$HOSTNAME}" | ||
: "${NODE_ADDR=127.0.0.1}" | ||
: "${NODE_BASEPORT=6001}" | ||
|
||
# Default hardware profile | ||
: "${NODE_HW=CPUs=4}" | ||
|
||
# Generate node names and associated ports | ||
NODE_NAMES=$(printf "nd[%05i-%05i]" 1 $SLURM_NUMNODES) | ||
NODE_PORTS=$(printf "%i-%i" $NODE_BASEPORT $(($NODE_BASEPORT+$SLURM_NUMNODES-1))) | ||
|
||
|
||
echo "INFO:" | ||
echo "INFO: Creating $SLURM_CONF with" | ||
echo "INFO: " | ||
column -t <<-EOF | ||
INFO: SLURMCTLD_HOST=$SLURMCTLD_HOST SLURMCTLD_ADDR=$SLURMCTLD_ADDR | ||
INFO: NODE_HOST=$NODE_HOST NODE_ADDR=$NODE_ADDR NODE_BASEPORT=$NODE_BASEPORT | ||
INFO: NODE_HW=$NODE_HW | ||
INFO: SLURM_NUMNODES=$SLURM_NUMNODES | ||
EOF | ||
echo "INFO: " | ||
echo "INFO: Derived values:" | ||
echo "INFO:" | ||
column -t <<-EOF | ||
INFO: NODE_NAMES=$NODE_NAMES | ||
INFO: NODE_PORTS=$NODE_PORTS | ||
EOF | ||
echo "INFO:" | ||
echo "INFO: Override any of the non-derived values by setting the respective environment variable" | ||
echo "INFO: when starting Docker." | ||
echo "INFO:" | ||
|
||
export PATH=$SLURM_ROOT/bin:$PATH | ||
export LD_LIBRARY_PATH=$SLURM_ROOT/lib:$LD_LIBRARY_PATH | ||
export MANPATH=$SLURM_ROOT/man:$MANPATH | ||
|
||
( | ||
echo "NodeName=${NODE_NAMES} NodeHostname=${NODE_HOST} NodeAddr=${NODE_ADDR} Port=${NODE_PORTS} State=UNKNOWN ${NODE_HW}" | ||
echo "PartitionName=dkr Nodes=ALL Default=YES MaxTime=INFINITE State=UP" | ||
) \ | ||
| sed -e "s/SLURMCTLDHOST/${SLURMCTLD_HOST}/" \ | ||
-e "s/SLURMCTLDADDR/${SLURMCTLD_ADDR}/" \ | ||
$SLURM_CONF_IN - \ | ||
> $SLURM_CONF | ||
|
||
NODE_NAME_LIST=$(scontrol show hostnames $NODE_NAMES) | ||
|
||
for n in $NODE_NAME_LIST | ||
do | ||
echo "$NODE_ADDR $n" >> /etc/hosts | ||
done | ||
|
||
echo | ||
echo "Starting Slurm services..." | ||
echo | ||
|
||
$SLURM_ROOT/sbin/slurmctld | ||
|
||
for n in $NODE_NAME_LIST | ||
do | ||
$SLURM_ROOT/sbin/slurmd -N $n | ||
done | ||
|
||
echo | ||
sinfo | ||
echo | ||
echo | ||
|
||
exec "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#!/bin/bash -x | ||
# | ||
# Usage: install_slurm.sh <slurm-version> <install-prefix> [configure-args] | ||
# | ||
|
||
SLURM_VERSION=$1 | ||
SLURM_ROOT=$2 | ||
SLURM_CONFDIR=$3 | ||
shift; shift; shift | ||
ARGS=$* | ||
|
||
slurm_tar_file=slurm-${SLURM_VERSION}.tar.bz2 | ||
slurm_url=https://download.schedmd.com/slurm/${slurm_tar_file} | ||
|
||
|
||
if [ -z "$SLURM_VERSION" -o -z "$SLURM_ROOT" -o -z "$SLURM_CONFDIR" ]; | ||
then | ||
echo "Usage: install_slurm.sh <slurm-version> <install-prefix> <sysconf-dir> [configure-args]" | ||
echo "No Slurm version or install-prefix specified on command line. Aborting." | ||
exit 1 | ||
fi | ||
|
||
# | ||
# Download slurm tarball and unpack it | ||
# | ||
if true; then | ||
|
||
mkdir -p /opt/src || exit 1 | ||
( | ||
cd /opt/src | ||
|
||
if ! stat $slurm_tar_file; then | ||
echo "=== downloading slurm ${SLURM_VERSION} from ${slurm_url}" | ||
curl --fail --output ${slurm_tar_file} ${slurm_url} || exit 1 | ||
fi | ||
|
||
echo "=== unpacking $slurm_tar_file" | ||
tar -xjf ${slurm_tar_file} || exit 1 | ||
) | ||
|
||
fi | ||
|
||
if [ "$ARGS" = "NO_BUILD" ]; | ||
then | ||
exit 0 | ||
fi | ||
|
||
# | ||
# Remove any old build directory. | ||
# Run configure, make, make install | ||
# | ||
|
||
stat /opt/build/slurm-${SLURM_VERSION} && rm -rf /opt/build/slurm-${SLURM_VERSION} | ||
mkdir -p /opt/build/slurm-${SLURM_VERSION} || exit 1 | ||
( | ||
cd /opt/build/slurm-${SLURM_VERSION} | ||
/opt/src/slurm-${SLURM_VERSION}/configure --help | ||
/opt/src/slurm-${SLURM_VERSION}/configure \ | ||
--prefix=${SLURM_ROOT} \ | ||
--sysconfdir=${SLURM_CONFDIR} \ | ||
--disable-dependency-tracking \ | ||
$ARGS | ||
|
||
make -j4 && make install | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
# | ||
# Example slurm.conf file. Please run configurator.html | ||
# (in doc/html) to build a configuration file customized | ||
# for your environment. | ||
# | ||
# | ||
# slurm.conf file generated by configurator.html. | ||
# Put this file on all nodes of your cluster. | ||
# See the slurm.conf man page for more information. | ||
# | ||
ClusterName=cluster | ||
SlurmctldHost=SLURMCTLDHOST(SLURMCTLDADDR) | ||
#SlurmctldHost= | ||
# | ||
#DisableRootJobs=NO | ||
#EnforcePartLimits=NO | ||
#Epilog= | ||
#EpilogSlurmctld= | ||
#FirstJobId=1 | ||
#MaxJobId=67043328 | ||
#GresTypes= | ||
#GroupUpdateForce=0 | ||
#GroupUpdateTime=600 | ||
#JobFileAppend=0 | ||
#JobRequeue=1 | ||
#JobSubmitPlugins=lua | ||
#KillOnBadExit=0 | ||
#LaunchType=launch/slurm | ||
#Licenses=foo*4,bar | ||
#MailProg=/bin/mail | ||
#MaxJobCount=10000 | ||
#MaxStepCount=40000 | ||
#MaxTasksPerNode=512 | ||
MpiDefault=pmi2 | ||
#MpiParams=ports=#-# | ||
#PluginDir= | ||
#PlugStackConfig= | ||
#PrivateData=jobs | ||
#ProctrackType=proctrack/cgroup | ||
ProctrackType=proctrack/linuxproc | ||
#Prolog= | ||
#PrologFlags= | ||
#PrologSlurmctld= | ||
#PropagatePrioProcess=0 | ||
#PropagateResourceLimits= | ||
#PropagateResourceLimitsExcept= | ||
#RebootProgram= | ||
ReturnToService=1 | ||
SlurmctldPidFile=/var/run/slurmctld.pid | ||
SlurmctldPort=6817 | ||
SlurmdPidFile=/var/run/slurmd.%n.pid | ||
SlurmdPort=6818 | ||
SlurmdSpoolDir=/var/spool/slurmd.%n | ||
SlurmUser=slurm | ||
#SlurmdUser=root | ||
#SrunEpilog= | ||
#SrunProlog= | ||
StateSaveLocation=/var/spool/slurmctld | ||
SwitchType=switch/none | ||
#TaskEpilog= | ||
TaskPlugin=task/affinity | ||
#TaskProlog= | ||
#TopologyPlugin=topology/tree | ||
#TmpFS=/tmp | ||
#TrackWCKey=no | ||
#TreeWidth= | ||
#UnkillableStepProgram= | ||
#UsePAM=0 | ||
# | ||
# | ||
# TIMERS | ||
#BatchStartTimeout=10 | ||
#CompleteWait=0 | ||
#EpilogMsgTime=2000 | ||
#GetEnvTimeout=2 | ||
#HealthCheckInterval=0 | ||
#HealthCheckProgram= | ||
InactiveLimit=0 | ||
KillWait=30 | ||
#MessageTimeout=10 | ||
#ResvOverRun=0 | ||
MinJobAge=300 | ||
#OverTimeLimit=0 | ||
SlurmctldTimeout=120 | ||
SlurmdTimeout=300 | ||
#UnkillableStepTimeout=60 | ||
#VSizeFactor=0 | ||
Waittime=0 | ||
# | ||
# | ||
# SCHEDULING | ||
#DefMemPerCPU=0 | ||
#MaxMemPerCPU=0 | ||
#SchedulerTimeSlice=30 | ||
SchedulerType=sched/backfill | ||
SelectType=select/cons_tres | ||
SelectTypeParameters=CR_CPU | ||
# | ||
# | ||
# JOB PRIORITY | ||
#PriorityFlags= | ||
#PriorityType=priority/basic | ||
#PriorityDecayHalfLife= | ||
#PriorityCalcPeriod= | ||
#PriorityFavorSmall= | ||
#PriorityMaxAge= | ||
#PriorityUsageResetPeriod= | ||
#PriorityWeightAge= | ||
#PriorityWeightFairshare= | ||
#PriorityWeightJobSize= | ||
#PriorityWeightPartition= | ||
#PriorityWeightQOS= | ||
# | ||
# | ||
# LOGGING AND ACCOUNTING | ||
#AccountingStorageEnforce=0 | ||
#AccountingStorageHost= | ||
#AccountingStoragePass= | ||
#AccountingStoragePort= | ||
AccountingStorageType=accounting_storage/none | ||
#AccountingStorageUser= | ||
#AccountingStoreFlags= | ||
#JobCompHost= | ||
#JobCompLoc= | ||
#JobCompPass= | ||
#JobCompPort= | ||
JobCompType=jobcomp/none | ||
#JobCompUser= | ||
#JobContainerType=job_container/none | ||
JobAcctGatherFrequency=30 | ||
JobAcctGatherType=jobacct_gather/none | ||
SlurmctldDebug=debug2 | ||
SlurmctldLogFile=/var/log/slurm/slurmctld.log | ||
SlurmdDebug=debug2 | ||
SlurmdLogFile=/var/log/slurm/slurmd.%n.log | ||
#SlurmSchedLogFile= | ||
#SlurmSchedLogLevel= | ||
#DebugFlags= | ||
# | ||
# | ||
# POWER SAVE SUPPORT FOR IDLE NODES (optional) | ||
#SuspendProgram= | ||
#ResumeProgram= | ||
#SuspendTimeout= | ||
#ResumeTimeout= | ||
#ResumeRate= | ||
#SuspendExcNodes= | ||
#SuspendExcParts= | ||
#SuspendRate= | ||
#SuspendTime= | ||
# | ||
# | ||
# COMPUTE NODES | ||
#NodeName=nd[1-3] NodeHostname=DOCKER_HOSTNAME NodeAddr=127.0.0.1 Port=[6001-6003] CPUs=4 State=UNKNOWN | ||
#PartitionName=dkr Nodes=ALL Default=YES MaxTime=INFINITE State=UP |