From 1cafd0a535eba7d172785fd4bf05a9d284a064b5 Mon Sep 17 00:00:00 2001 From: Will Dunklin Date: Tue, 19 Mar 2024 16:20:29 -0400 Subject: [PATCH 01/24] Hack together dsa_common in singularity example --- devops/singularity-minimal/.gitignore | 8 ++ devops/singularity-minimal/SIF/.gitignore | 2 + devops/singularity-minimal/build.sh | 3 + devops/singularity-minimal/dsa_common.def | 36 +++++++ devops/singularity-minimal/instance_pull.sh | 6 ++ devops/singularity-minimal/instance_run.sh | 10 ++ devops/singularity-minimal/instance_start.sh | 29 +++++ devops/singularity-minimal/instance_stop.sh | 5 + devops/singularity-minimal/provision.yaml | 108 +++++++++++++++++++ devops/singularity-minimal/start_girder.sh | 48 +++++++++ 10 files changed, 255 insertions(+) create mode 100644 devops/singularity-minimal/SIF/.gitignore create mode 100755 devops/singularity-minimal/build.sh create mode 100644 devops/singularity-minimal/dsa_common.def create mode 100755 devops/singularity-minimal/instance_pull.sh create mode 100755 devops/singularity-minimal/instance_run.sh create mode 100755 devops/singularity-minimal/instance_start.sh create mode 100755 devops/singularity-minimal/instance_stop.sh create mode 100644 devops/singularity-minimal/provision.yaml create mode 100755 devops/singularity-minimal/start_girder.sh diff --git a/devops/singularity-minimal/.gitignore b/devops/singularity-minimal/.gitignore index 3ba8463a..7b51e69c 100644 --- a/devops/singularity-minimal/.gitignore +++ b/devops/singularity-minimal/.gitignore @@ -1,2 +1,10 @@ assetstore db +opt +logs +fuse + +girder +mongodb +etc.hosts +resolv.conf diff --git a/devops/singularity-minimal/SIF/.gitignore b/devops/singularity-minimal/SIF/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-minimal/SIF/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-minimal/build.sh b/devops/singularity-minimal/build.sh new file mode 100755 index 00000000..0e63e788 --- /dev/null +++ b/devops/singularity-minimal/build.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +sudo singularity build SIF/dsa_common.sif dsa_common.def diff --git a/devops/singularity-minimal/dsa_common.def b/devops/singularity-minimal/dsa_common.def new file mode 100644 index 00000000..ace95667 --- /dev/null +++ b/devops/singularity-minimal/dsa_common.def @@ -0,0 +1,36 @@ +Bootstrap: docker +From: dsarchive/dsa_common + +%setup + +%files + +%environment + export NVM_DIR=/opt/nvm + . $NVM_DIR/nvm.sh + +%post + # install nvm / node properly + export NVM_DIR=/opt/nvm + + git clone https://github.com/nvm-sh/nvm.git $NVM_DIR + . $NVM_DIR/nvm.sh + + nvm install 14 && \ + nvm alias default 14 && \ + nvm use default + + mv /opt /root_opt + +%runscript + export NVM_DIR=/opt/nvm + . $NVM_DIR/nvm.sh + + bash + +%startscript + # assuming that the user mounts a local directory to /opt/ + # this is necessary for getting the correct singularity permissions + echo " - Copying docker image's opt (/root_opt) to user mounted /opt" + echo " - Make sure you're mounting a local directory to /opt" + cp -r /root_opt/* /opt diff --git a/devops/singularity-minimal/instance_pull.sh b/devops/singularity-minimal/instance_pull.sh new file mode 100755 index 00000000..9ef2b041 --- /dev/null +++ b/devops/singularity-minimal/instance_pull.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +singularity pull SIF/rabbitMQ.sif library://sylabs/examples/rabbitmq +singularity pull SIF/mongodb.sif docker://mongo:latest +singularity pull SIF/dsarchive.sif docker://suhaskc/dsacommon:latest +singularity pull SIF/memcached.sif docker://memcached:latest diff --git a/devops/singularity-minimal/instance_run.sh b/devops/singularity-minimal/instance_run.sh new file mode 100755 index 00000000..d3dc3eba --- /dev/null +++ b/devops/singularity-minimal/instance_run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# singularity run instance://dsa-mongodb-1 & +# singularity run instance://dsa-memcached-1 & +# singularity run instance://dsa-rabbitMQ-1 & +# singularity run instance://dsa-dsarchive-1 bash -c 'python /opt/digital_slide_archive/devops/dsa/provision.py --sample-data && girder serve' & + +# docker run --rm -it -p 27017:27017 mongo:latest mongod # needs to have port bound externally + +singularity run instance://test-dsarchive diff --git a/devops/singularity-minimal/instance_start.sh b/devops/singularity-minimal/instance_start.sh new file mode 100755 index 00000000..50add24a --- /dev/null +++ b/devops/singularity-minimal/instance_start.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# singularity instance start --bind ./db:/data/db SIF/mongodb.sif dsa-mongodb-1 +# singularity instance start SIF/memcached.sif dsa-memcached-1 +# singularity instance start SIF/rabbitMQ.sif dsa-rabbitMQ-1 +singularity instance start \ + --bind ./opt:/opt \ + --bind ./assetstore:/assetstore \ + --bind ./logs:/logs \ + --bind ./fuse:/fuse \ + --bind ./girder.cfg:/etc/girder.cfg \ + --bind ./start_girder.sh:/opt/start_girder.sh \ + --bind ./provision.yaml:/opt/provision.yaml \ + SIF/dsa_common.sif test-dsarchive + +# needed to use singularity in singularity (for `singularity pull`, etc) + # --bind /usr/bin/singularity:/usr/bin/singularity \ + # --bind /usr/bin/apptainer:/usr/bin/apptainer \ + # --bind /etc/apptainer/apptainer.conf:/etc/apptainer/apptainer.conf \ + # --bind /usr/bin/mksquashfs:/usr/bin/mksquashfs \ + # --bind /usr/bin/unsquashfs:/usr/bin/unsquashfs \ + # --bind /usr/lib/x86_64-linux-gnu/liblzo2.so.2:/usr/lib/x86_64-linux-gnu/liblzo2.so.2 \ + +# needed to run `singularity exec` (doesn't work because of permissions) + # --bind /etc/apptainer:/etc/apptainer \ + # --bind /var/lib/apptainer/mnt/session:/var/lib/apptainer/mnt/session \ + # --bind /usr/libexec/apptainer:/usr/libexec/apptainer \ + # --bind /usr/libexec/apptainer/bin/starter:/usr/libexec/apptainer/bin/starter \ + # --bind /etc/apptainer/capability.json:/etc/apptainer/capability.json \ diff --git a/devops/singularity-minimal/instance_stop.sh b/devops/singularity-minimal/instance_stop.sh new file mode 100755 index 00000000..3ca3e7c5 --- /dev/null +++ b/devops/singularity-minimal/instance_stop.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +singularity instance stop test-dsarchive + +find ./opt/* -not -path "*opt/local_*" -not -name "hist.sif" -delete diff --git a/devops/singularity-minimal/provision.yaml b/devops/singularity-minimal/provision.yaml new file mode 100644 index 00000000..c540dae6 --- /dev/null +++ b/devops/singularity-minimal/provision.yaml @@ -0,0 +1,108 @@ +--- +# The provision script can take a yaml file with provision options +# This is a dictionary of command-line arguments for the provisioning script +force: False +samples: False +clean-delete-locks: True +sample-collection: Samples +sample-folder: Images +# Set use-defaults to False to skip default settings +use-defaults: True +# Set mongo_compat to False to not automatically set the mongo feature +# compatibility version to the current server version. +mongo-compat: True +# A list of additional pip modules to install; if any are girder plugins with +# client-side code, also specify rebuild-client. +# pip: +# - girder-oauth +# - girder-ldap +# rebuild-client may be False, True (for production mode), or "development" +rebuild-client: False +# Run additional shell commands before start +# shell: +# - ls +# Default admin user if there are no admin users +admin: + login: admin + password: password + firstName: Admin + lastName: Admin + email: admin@nowhere.nil + public: True +# Default assetstore if there are no assetstores +assetstore: + method: createFilesystemAssetstore + name: Assetstore + root: /assetstore +# Any resources to ensure exist. A model must be specified. This creates the +# resource if there is no match for all specified values. A value of +# "resource:" is converted to the resource document with that resource +# path. "resource:admin" uses the default admin, "resourceid:" is the +# string id for the resource path, and "resourceid:admin" is the string if for +# default admin. +# You can add metadata to a resource. The default key is meta. If +# metadata_update is False, metadata will not be set if any metadata +# already exists. +resources: + - model: collection + name: Tasks + creator: resource:admin + public: True + - model: folder + parent: resource:collection/Tasks + parentType: collection + name: "Slicer CLI Web Tasks" + creator: resource:admin + public: True + # metadata: + # sample_key: sample_value + # metadata_key: meta + # metadata_update: True +settings: + worker.broker: "amqp://guest:guest@rabbitmq" + worker.backend: "rpc://guest:guest@rabbitmq" + worker.api_url: "http://girder:8080/api/v1" + worker.direct_path: True + core.brand_name: "Digital Slide Archive" + # core.http_only_cookies: True + histomicsui.webroot_path: "histomics" + histomicsui.alternate_webroot_path: "histomicstk" + histomicsui.delete_annotations_after_ingest: True + homepage.markdown: |- + # Digital Slide Archive + --- + ## Bioinformatics Platform + + Welcome to the **Digital Slide Archive**. + + Developers who want to use the Girder REST API should check out the + [interactive web API docs](api/v1). + + The [HistomicsUI](histomics) application is enabled. + slicer_cli_web.task_folder: "resourceid:collection/Tasks/Slicer CLI Web Tasks" +# List slicer-cli-images to pull, if not present, and load +# slicer-cli-image: +# - dsarchive/histomicstk:latest +# List slicer-cli-images to always pull, and load +slicer-cli-image-pull: + - dsarchive/histomicstk:latest +# The worker can specify parameters for provisioning +# worker-rabbitmq-host: girder:8080 +worker-rabbitmq-user: guest +worker-rabbitmq-pass: guest +worker-config: /opt/girder_worker/girder_worker/worker.local.cfg +# These have precedence over the top level values +worker: + # rabbitmq-host: girder:8080 + # rabbitmq-user: guest + # rabbitmq-pass: guest + # config: /opt/girder_worker/girder_worker/worker.local.cfg + # Install additional pip packages in the worker + # pip: + # - package_one + # Run additional shell commands in the worker before start + # shell: + # - ls +pip: + - -e /opt/local_girder_worker + - -e /opt/local_slicer_cli_web diff --git a/devops/singularity-minimal/start_girder.sh b/devops/singularity-minimal/start_girder.sh new file mode 100755 index 00000000..10d57dfb --- /dev/null +++ b/devops/singularity-minimal/start_girder.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Ensures that the main process runs as the DSA_USER and is part of both that +# group and the docker group. Fail if DSA_USER is not specified. +# if [[ -z "$DSA_USER" ]]; then +# echo "Set the DSA_USER before starting (e.g, DSA_USER=\$$(id -u):\$$(id -g) " +# exit 1 +# fi +# add a user with the DSA_USER's id; this user is named ubuntu if it doesn't +# exist. +# adduser --uid ${DSA_USER%%:*} --disabled-password --gecos "" ubuntu 2>/dev/null +# add a group with the DSA_USER's group id. +# addgroup --gid ${DSA_USER#*:} $(id -ng ${DSA_USER#*:}) 2>/dev/null +# add the user to the user group. +# adduser $(id -nu ${DSA_USER%%:*}) $(getent group ${DSA_USER#*:} | cut "-d:" -f1) 2>/dev/null +# add a group with the docker group id. +# addgroup --gid $(stat -c "%g" /var/run/docker.sock) docker 2>/dev/null +# add the user to the docker group. +# adduser $(id -nu ${DSA_USER%%:*}) $(getent group $(stat -c "%g" /var/run/docker.sock) | cut "-d:" -f1) 2>/dev/null +# Try to increase permissions for the docker socket; this helps this work on +# OSX where the users don't translate +# chmod 777 /var/run/docker.sock 2>/dev/null || true +# Use iptables to make some services appear as if they are on localhost (as +# well as on the docker network). This is done to allow tox tests to run. +# sysctl -w net.ipv4.conf.eth0.route_localnet=1 +# iptables -t nat -A OUTPUT -o lo -p tcp -m tcp --dport 27017 -j DNAT --to-destination `dig +short mongodb`:27017 +# iptables -t nat -A OUTPUT -o lo -p tcp -m tcp --dport 11211 -j DNAT --to-destination `dig +short memcached`:11211 +# iptables -t nat -A POSTROUTING -o eth0 -m addrtype --src-type LOCAL --dst-type UNICAST -j MASQUERADE +# echo 'PATH="/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH"' >> /home/$(id -nu ${DSA_USER%%:*})/.bashrc +echo ==== Pre-Provisioning === +PATH="/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH" \ +python /opt/digital_slide_archive/devops/dsa/provision.py -v --pre --yaml /opt/provision.yaml +# Run subsequent commands as the DSA_USER. This sets some paths based on what +# is expected in the Docker so that the current python environment and the +# devops/dsa/utils are available. Then: +# - Provision the Girder instance. This sets values in the database, such as +# creating an admin user if there isn't one. See the provision.py script for +# the details. +# - If possible, set up a girder mount. This allows file-like access of girder +# resources. It requires the host to have fuse installed and the docker +# container to be run with enough permissions to use fuse. +# - Start the main girder process. +PATH="/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH"; +echo ==== Provisioning === && +python /opt/digital_slide_archive/devops/dsa/provision.py -v --main --yaml /opt/provision.yaml && +echo ==== Creating FUSE mount === && +(girder mount ${DSA_GIRDER_MOUNT_OPTIONS%%:-} /fuse || true) && +echo ==== Starting Girder === && +girder serve --dev From e36a6da8831cea534847f072cf56578c230fa73b Mon Sep 17 00:00:00 2001 From: Will Dunklin Date: Tue, 19 Mar 2024 16:27:37 -0400 Subject: [PATCH 02/24] Add directory structure --- devops/singularity-minimal/.gitignore | 6 +++--- devops/singularity-minimal/fuse/.gitignore | 2 ++ devops/singularity-minimal/instance_stop.sh | 2 +- devops/singularity-minimal/logs/.gitignore | 2 ++ devops/singularity-minimal/opt/.gitignore | 2 ++ 5 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 devops/singularity-minimal/fuse/.gitignore create mode 100644 devops/singularity-minimal/logs/.gitignore create mode 100644 devops/singularity-minimal/opt/.gitignore diff --git a/devops/singularity-minimal/.gitignore b/devops/singularity-minimal/.gitignore index 7b51e69c..203ff09c 100644 --- a/devops/singularity-minimal/.gitignore +++ b/devops/singularity-minimal/.gitignore @@ -1,8 +1,8 @@ assetstore db -opt -logs -fuse +# opt +# logs +# fuse girder mongodb diff --git a/devops/singularity-minimal/fuse/.gitignore b/devops/singularity-minimal/fuse/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-minimal/fuse/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-minimal/instance_stop.sh b/devops/singularity-minimal/instance_stop.sh index 3ca3e7c5..3e13a89e 100755 --- a/devops/singularity-minimal/instance_stop.sh +++ b/devops/singularity-minimal/instance_stop.sh @@ -2,4 +2,4 @@ singularity instance stop test-dsarchive -find ./opt/* -not -path "*opt/local_*" -not -name "hist.sif" -delete +find ./opt/* -not -path "*opt/local_*" -not -path "*opt/.gitignore" -delete diff --git a/devops/singularity-minimal/logs/.gitignore b/devops/singularity-minimal/logs/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-minimal/logs/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-minimal/opt/.gitignore b/devops/singularity-minimal/opt/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-minimal/opt/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore From aae95f94d92ac9feaf884f5cf708277d663e576b Mon Sep 17 00:00:00 2001 From: Will Dunklin Date: Fri, 18 Oct 2024 10:45:37 -0400 Subject: [PATCH 03/24] UF Progress --- devops/singularity-minimal/blue/.gitignore | 2 + devops/singularity-minimal/build.sh | 2 +- devops/singularity-minimal/dsa_common.def | 16 ++- devops/singularity-minimal/dsa_compose.sh | 100 ++++++++++++++++++ devops/singularity-minimal/provision.yaml | 33 +++++- .../rabbitmqdata/.gitignore | 2 + devops/singularity-minimal/start_girder.sh | 13 ++- devops/singularity-minimal/start_worker.sh | 39 +++++++ devops/singularity-minimal/tmp/.gitignore | 2 + .../singularity-minimal/worker_opt/.gitignore | 2 + 10 files changed, 201 insertions(+), 10 deletions(-) create mode 100644 devops/singularity-minimal/blue/.gitignore create mode 100755 devops/singularity-minimal/dsa_compose.sh create mode 100644 devops/singularity-minimal/rabbitmqdata/.gitignore create mode 100755 devops/singularity-minimal/start_worker.sh create mode 100644 devops/singularity-minimal/tmp/.gitignore create mode 100644 devops/singularity-minimal/worker_opt/.gitignore diff --git a/devops/singularity-minimal/blue/.gitignore b/devops/singularity-minimal/blue/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-minimal/blue/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-minimal/build.sh b/devops/singularity-minimal/build.sh index 0e63e788..4459634a 100755 --- a/devops/singularity-minimal/build.sh +++ b/devops/singularity-minimal/build.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -sudo singularity build SIF/dsa_common.sif dsa_common.def +sudo apptainer build SIF/dsa_common.sif dsa_common.def diff --git a/devops/singularity-minimal/dsa_common.def b/devops/singularity-minimal/dsa_common.def index ace95667..dd490a2b 100644 --- a/devops/singularity-minimal/dsa_common.def +++ b/devops/singularity-minimal/dsa_common.def @@ -10,6 +10,7 @@ From: dsarchive/dsa_common . $NVM_DIR/nvm.sh %post + set -x # install nvm / node properly export NVM_DIR=/opt/nvm @@ -20,13 +21,26 @@ From: dsarchive/dsa_common nvm alias default 14 && \ nvm use default + # install pyaml in virtual environment + . /opt/venv/bin/activate + /opt/venv/bin/pip install pyaml + mv /opt /root_opt + # install apptainer + apt install -y software-properties-common + add-apt-repository -y ppa:apptainer/ppa + apt update + apt install -y apptainer + %runscript + set -x + export NVM_DIR=/opt/nvm . $NVM_DIR/nvm.sh + . /opt/venv/bin/activate - bash + bash -c "$@" %startscript # assuming that the user mounts a local directory to /opt/ diff --git a/devops/singularity-minimal/dsa_compose.sh b/devops/singularity-minimal/dsa_compose.sh new file mode 100755 index 00000000..bea66fe5 --- /dev/null +++ b/devops/singularity-minimal/dsa_compose.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +set -x + +cd $(dirname $0) + +apptainer instance stop -a || echo "No instances stopped" + +# Load Modules +# module load slurm-drmaa + +# Add / Pull images if not pulled +# Start instances +## Start MongoDB and RabbitMQ +apptainer instance start \ + --bind ./db:/data/db \ + SIF/mongodb.sif dsa-mongodb-1 + # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/db:/data/db \ + # --no-mount /cmsuf \ + +apptainer instance start \ + --env RABBITMQ_DEFAULT_USER=guest \ + --env RABBITMQ_DEFAULT_PASS=guest \ + --bind ./rabbitmqdata:/var/lib/rabbitmq/ \ + SIF/rabbitMQ.sif dsa-rabbitMQ-1 + # --no-mount /cmsuf \ + +apptainer instance start SIF/memcached.sif dsa-memcached-1 + # --no-mount /cmsuf + +# clean girder opt +find ./opt/* -not -path "*opt/local_*" -not -path "*opt/.gitignore" -delete + +# set up worker opt +rm -rf ./worker_opt/* +cp -r ./opt/* ./worker_opt/ + +## Start Girder and Worker +apptainer instance start \ + --bind ./blue:/blue/pinaki.sarder/rc-svc-pinaki.sarder-web \ + --bind ./assetstore:/assetstore \ + --bind ./logs:/logs \ + --bind ./tmp:/tmp \ + --bind ./fuse:/fuse \ + --bind ./girder.cfg:/etc/girder.cfg \ + --bind ./start_girder.sh:/opt/start_girder.sh \ + --bind ./provision.yaml:/opt/provision.yaml \ + --bind ../dsa/provision.py:/opt/provision.py \ + --bind ./opt:/opt \ + SIF/dsa_common.sif test-dsarchive + # --no-mount /cmsuf \ + # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/assetstore:/assetstore \ + # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/logs:/logs \ + # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/tmp:/tmp \ + # --bind /opt/slurm \ + # --bind /apps \ + # --bind /var/run/munge:/run/munge \ + +apptainer instance start \ + --bind ./blue:/blue/pinaki.sarder/rc-svc-pinaki.sarder-web \ + --bind ./logs:/logs \ + --bind ./worker_opt:/opt \ + --bind ./start_worker.sh:/opt/start_worker.sh \ + --bind ./provision.yaml:/opt/provision.yaml \ + --bind ../dsa/provision.py:/opt/provision.py \ + SIF/dsa_common.sif dsa-worker-1 + # --no-mount /cmsuf \ + # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/logs:/logs \ + # --bind /apps \ + # --bind /var/run/munge:/run/munge \ + # --bind /opt/slurm \ + +## Execute shells +apptainer exec instance://dsa-mongodb-1 mongod > /dev/null & + +sleep 5 # TODO: WHY THE HELL IS THERE A RACE CONDITIONNNNNN + # the files seem to be not mounted properly before this stuff runs + +apptainer run \ + --env SIF_IMAGE_PATH="/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/tmp/sifs/" \ + --env TMPDIR=/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/tmp \ + --env LOGS=/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/logs \ + --env PATH=/opt/slurm/bin:$PATH \ + --env SLURM_QOS=pinaki.sarder-dsa \ + --env SLURM_ACCOUNT=pinaki.sarder-dsa \ + --env DSA_PROVISION_YAML=/opt/provision.yaml \ + --env GIRDER_WORKER_BROKER=amqp://guest:guest@localhost:5672/ \ + --env GIRDER_WORKER_BACKEND=rpc://guest:guest@localhost:5672/ \ + instance://dsa-worker-1 /opt/start_worker.sh & + +sleep 30 + +apptainer run \ + --env SIF_IMAGE_PATH="/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/tmp/sifs/" \ + --env TMPDIR=/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/tmp \ + --env LOGS=/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/logs \ + --env GIRDER_SETTING_WORKER_API_URL=http://0.0.0.0:8101/api/v1 \ + --env PATH=/opt/slurm/bin:$PATH \ + --env SLURM_QOS=pinaki.sarder-dsa \ + --env SLURM_ACCOUNT=pinaki.sarder-dsa \ + instance://test-dsarchive bash # /opt/start_girder.sh diff --git a/devops/singularity-minimal/provision.yaml b/devops/singularity-minimal/provision.yaml index c540dae6..87d0ce87 100644 --- a/devops/singularity-minimal/provision.yaml +++ b/devops/singularity-minimal/provision.yaml @@ -63,6 +63,16 @@ settings: worker.backend: "rpc://guest:guest@rabbitmq" worker.api_url: "http://girder:8080/api/v1" worker.direct_path: True + + worker.slurm_account: "pinaki.sarder" + worker.slurm_qos: "normal" + worker.slurm_mem: 16000 + worker.slurm_cpus: 4 + worker.slurm_gpu: 1 + # worker.slurm_ntasks: 1 + # worker.slurm_partition: "hpg2-compute" + worker.slurm_time: "72:00" + core.brand_name: "Digital Slide Archive" # core.http_only_cookies: True histomicsui.webroot_path: "histomics" @@ -85,9 +95,9 @@ settings: # - dsarchive/histomicstk:latest # List slicer-cli-images to always pull, and load slicer-cli-image-pull: - - dsarchive/histomicstk:latest + # - dsarchive/histomicstk:latest # The worker can specify parameters for provisioning -# worker-rabbitmq-host: girder:8080 +# worker-rabbitmq-host: localhost:5672 worker-rabbitmq-user: guest worker-rabbitmq-pass: guest worker-config: /opt/girder_worker/girder_worker/worker.local.cfg @@ -98,11 +108,26 @@ worker: # rabbitmq-pass: guest # config: /opt/girder_worker/girder_worker/worker.local.cfg # Install additional pip packages in the worker - # pip: - # - package_one + # shell: + # - pip -V + # - python -V + pip: + # - -e /opt/local_girder_worker + - --force-reinstall -e /opt/local_girder_worker + - -e /opt/local_girder_worker/girder_worker/singularity + - -e /opt/local_girder_worker/girder_worker/slurm + - -e /opt/local_slicer_cli_web + - -e /opt/local_slicer_cli_web/slicer_cli_web/singularity # Run additional shell commands in the worker before start # shell: + # - pip -V + # - pip freeze | grep worker + # - pip install -e /opt/local_girder_worker + # - pip freeze | grep worker # - ls pip: - -e /opt/local_girder_worker + - -e /opt/local_girder_worker/girder_worker/singularity + - -e /opt/local_girder_worker/girder_worker/slurm - -e /opt/local_slicer_cli_web + - -e /opt/local_slicer_cli_web/slicer_cli_web/singularity diff --git a/devops/singularity-minimal/rabbitmqdata/.gitignore b/devops/singularity-minimal/rabbitmqdata/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-minimal/rabbitmqdata/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-minimal/start_girder.sh b/devops/singularity-minimal/start_girder.sh index 10d57dfb..281bfea9 100755 --- a/devops/singularity-minimal/start_girder.sh +++ b/devops/singularity-minimal/start_girder.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -x # Ensures that the main process runs as the DSA_USER and is part of both that # group and the docker group. Fail if DSA_USER is not specified. # if [[ -z "$DSA_USER" ]]; then @@ -28,7 +29,8 @@ # echo 'PATH="/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH"' >> /home/$(id -nu ${DSA_USER%%:*})/.bashrc echo ==== Pre-Provisioning === PATH="/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH" \ -python /opt/digital_slide_archive/devops/dsa/provision.py -v --pre --yaml /opt/provision.yaml +# python /opt/digital_slide_archive/devops/dsa/provision.py -v --pre --yaml /opt/provision.yaml +python /opt/provision.py -v --pre --yaml /opt/provision.yaml # Run subsequent commands as the DSA_USER. This sets some paths based on what # is expected in the Docker so that the current python environment and the # devops/dsa/utils are available. Then: @@ -39,10 +41,13 @@ python /opt/digital_slide_archive/devops/dsa/provision.py -v --pre --yaml /opt/p # resources. It requires the host to have fuse installed and the docker # container to be run with enough permissions to use fuse. # - Start the main girder process. -PATH="/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH"; echo ==== Provisioning === && -python /opt/digital_slide_archive/devops/dsa/provision.py -v --main --yaml /opt/provision.yaml && +PATH="/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH" \ +# python /opt/digital_slide_archive/devops/dsa/provision.py -v --main --yaml /opt/provision.yaml +python /opt/provision.py -v --main --yaml /opt/provision.yaml echo ==== Creating FUSE mount === && -(girder mount ${DSA_GIRDER_MOUNT_OPTIONS%%:-} /fuse || true) && +PATH="/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH" \ +girder mount ${DSA_GIRDER_MOUNT_OPTIONS%%:-} /fuse echo ==== Starting Girder === && +PATH="/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH" \ girder serve --dev diff --git a/devops/singularity-minimal/start_worker.sh b/devops/singularity-minimal/start_worker.sh new file mode 100755 index 00000000..24e70364 --- /dev/null +++ b/devops/singularity-minimal/start_worker.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x +# Ensures that the main process runs as the DSA_USER and is part of both that +# group and the docker group. Fail if DSA_USER is not specified. +# if [[ -z "$DSA_USER" ]]; then +# echo "Set the DSA_USER before starting (e.g, DSA_USER=\$$(id -u):\$$(id -g) " +# exit 1 +# fi +# # add a user with the DSA_USER's id; this user is named ubuntu if it doesn't +# # exist. +# adduser --uid ${DSA_USER%%:*} --disabled-password --gecos "" ubuntu 2>/dev/null +# # add a group with the DSA_USER's group id. +# addgroup --gid ${DSA_USER#*:} $(id -ng ${DSA_USER#*:}) 2>/dev/null +# # add the user to the user group. +# adduser $(id -nu ${DSA_USER%%:*}) $(getent group ${DSA_USER#*:} | cut "-d:" -f1) 2>/dev/null +# # add a group with the docker group id. +# addgroup --gid $(stat -c "%g" /var/run/docker.sock) docker 2>/dev/null +# # add the user to the docker group. +# adduser $(id -nu ${DSA_USER%%:*}) $(getent group $(stat -c "%g" /var/run/docker.sock) | cut "-d:" -f1) 2>/dev/null +# # Try to increase permissions for the docker socket; this helps this work on +# # OSX where the users don't translate +# chmod 777 /var/run/docker.sock 2>/dev/null || true +# chmod 777 ${TMPDIR:-/tmp} || true +echo ==== Pre-Provisioning === +# PATH="/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH" \ +# /opt/venv/bin/python3 /opt/digital_slide_archive/devops/dsa/provision.py --worker-pre -v --yaml /opt/provision.yaml +/opt/venv/bin/python3 /opt/provision.py --worker-pre -v --yaml /opt/provision.yaml +echo ==== Provisioning === && +# PATH="/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH" \ +# /opt/venv/bin/python3 /opt/digital_slide_archive/devops/dsa/provision.py --worker-main -v --yaml /opt/provision.yaml +/opt/venv/bin/python3 /opt/provision.py --worker-main -v --yaml /opt/provision.yaml +echo ==== Starting Worker === && +# Run subsequent commands as the DSA_USER. This sets some paths based on what +# is expected in the Docker so that the current python environment and the +# devops/dsa/utils are available. Then it runs girder_worker +# su $(id -nu ${DSA_USER%%:*}) -c " +# PATH=\"/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH\" \ +DOCKER_CLIENT_TIMEOUT=86400 TMPDIR=${TMPDIR:-/tmp} GW_DIRECT_PATHS=true /opt/venv/bin/python3 -m girder_worker --concurrency=${DSA_WORKER_CONCURRENCY:-2} -Ofair --prefetch-multiplier=1 +# " diff --git a/devops/singularity-minimal/tmp/.gitignore b/devops/singularity-minimal/tmp/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-minimal/tmp/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-minimal/worker_opt/.gitignore b/devops/singularity-minimal/worker_opt/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-minimal/worker_opt/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore From 44e2695a7261469997e073adafb101767949851f Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 25 Feb 2025 15:53:44 -0500 Subject: [PATCH 04/24] Add slurm configuration --- devops/dsa/altfs/.gitignore | 2 + devops/slurm/.dockerignore | 3 + devops/slurm/.gitignore | 4 + devops/slurm/README.rst | 189 +++++ devops/slurm/docker-compose.yml | 193 +++++ devops/slurm/girder.cfg | 45 + devops/slurm/provision.py | 769 ++++++++++++++++++ devops/slurm/provision.yaml | 126 +++ devops/slurm/rabbitmq.advanced.config | 1 + devops/slurm/start_girder.sh | 54 ++ devops/slurm/start_worker.sh | 34 + devops/slurm/utils/.vimrc | 39 + devops/slurm/utils/cli_test.py | 322 ++++++++ .../slurm/utils/rebuild_and_restart_girder.sh | 11 + devops/slurm/utils/restart_girder.sh | 8 + 15 files changed, 1800 insertions(+) create mode 100644 devops/dsa/altfs/.gitignore create mode 100644 devops/slurm/.dockerignore create mode 100644 devops/slurm/.gitignore create mode 100644 devops/slurm/README.rst create mode 100644 devops/slurm/docker-compose.yml create mode 100644 devops/slurm/girder.cfg create mode 100755 devops/slurm/provision.py create mode 100644 devops/slurm/provision.yaml create mode 100644 devops/slurm/rabbitmq.advanced.config create mode 100755 devops/slurm/start_girder.sh create mode 100755 devops/slurm/start_worker.sh create mode 100644 devops/slurm/utils/.vimrc create mode 100755 devops/slurm/utils/cli_test.py create mode 100755 devops/slurm/utils/rebuild_and_restart_girder.sh create mode 100755 devops/slurm/utils/restart_girder.sh diff --git a/devops/dsa/altfs/.gitignore b/devops/dsa/altfs/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/dsa/altfs/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/slurm/.dockerignore b/devops/slurm/.dockerignore new file mode 100644 index 00000000..847e7d05 --- /dev/null +++ b/devops/slurm/.dockerignore @@ -0,0 +1,3 @@ +assetstore +db +logs diff --git a/devops/slurm/.gitignore b/devops/slurm/.gitignore new file mode 100644 index 00000000..78ff212e --- /dev/null +++ b/devops/slurm/.gitignore @@ -0,0 +1,4 @@ +assetstore/ +db/ +logs/ +*.local.* diff --git a/devops/slurm/README.rst b/devops/slurm/README.rst new file mode 100644 index 00000000..cd5fa61c --- /dev/null +++ b/devops/slurm/README.rst @@ -0,0 +1,189 @@ +======================================== +Digital Slide Archive via Docker Compose +======================================== + +This directory contains a complete docker compose set up for the Digital Slide Archive. + +Edit the docker-compose.yml file (or add a docker compose override file) to add mount points for additional data or for exposing additional ports. + +Prerequisites +------------- + +Before using this, you need both Docker and docker compose. See the `official installation instructions `_. + +The docker compose file assumes certain file paths. This has been tested on Ubuntu 20.04. It will probably work on other Linux variants. + +Get the Digital Slide Archive repository:: + + git clone https://github.com/DigitalSlideArchive/digital_slide_archive + +Hardware Requirements +~~~~~~~~~~~~~~~~~~~~~ + +The main server has only modest hardware requirements. It can run in a 2 core, 8 GByte machine (such as an t3.large EC2 instance), but it will perform better with more memory and cores. + +Storage requirements are largely driven by the images used and the number of annotations. For a small test instance, a few GBytes will suffice. For a deployment with high usage, this will need to be much, much larger. + +The worker's requirements are highly dependent on the algorithms being run. The core HistomicsTK examples do not require a GPU, but benefit for more cores and more memory. For a small test instance, this can be run on the same machine as the server. For a serious deployment, multiple workers with GPUs will greatly facilitate some jobs. + +Start +----- + +Change to the appropriate directory:: + + cd digital_slide_archive/devops/dsa/ + +To get the most recent built docker images, do:: + + docker compose pull + +If you don't pull the images, the main image will be built in preference to pulling. + +To start the Digital Slide Archive:: + + DSA_USER=$(id -u):$(id -g) docker compose up + +This uses your current user id so that database files, logs, assetstore files, and temporary files are owned by the current user. If you omit setting ``DSA_USER``, files may be created owned by root. + +The girder instance can now be accessed at http://localhost:8080. By default, it creates an ``admin`` user with a password of ``password``. Note that this example does not add any default tasks or sample files. You can log in with the admin user and use the Slicer CLI Web plugin settings to add default tasks (e.g., ``dsarchive/histomicstk:latest``). + +Stop +---- + +To stop the Digital Slide Archive:: + + docker compose down -v + +The ``-v`` option removes unneeded temporary docker volumes. + +Sample Data +----------- + +Sample data can be added after performing ``docker compose up`` by running:: + + python3 utils/cli_test.py dsarchive/histomicstk:latest --test + +This downloads the HistomicsTK analysis tools, some sample data, and runs nuclei detection on some of the sample data. You need Python 3.6 or later available and may need to ``pip install girder-client`` before you can run this command. + + +Development +----------- + +You can log into the running ``girder`` or ``worker`` containers by typing:: + + docker compose exec girder bash + +There are two convenience scripts ``restart_girder.sh`` and ``rebuild_and_restart_girder.sh`` that can be run in the container. + +You can develop source code by mounting the source directory into the container. See the ``docker-compose.yml`` file for details. + +If you need to log into the container as the Girder user, type:: + + docker compose exec --user $(id -u) girder bash + +Technical Details +----------------- + +The Digital Slider Archive is built in Girder and Girder Worker. Here, these are coordinated using docker compose. There are five containers that are started: + +- `Girder `_. Girder is an asset and user management system. It handles permissions and serves data via http. + +- `MongoDB `_. Girder stores settings and information about users and assets in a MongoDB database. + +- `Girder Worker `_. Girder Worker is a task runner based on `Celery `_ that has specific features to get authenticated data from Girder. + +- `RabbitMQ `_. Girder communicates to Girder Worker through a broker. In this configuration it is RabbitMQ. Girder Worker can be run on multiple computers communicating with a single broker to distribute processing. + +- `Memcached `_. Memcached is used to cache data for faster access. This is used for large tiled images. + +The Digital Slide Archive relies on several Girder plugins: + +- `large_image `_. This provides a standardized way to access a wide range of image formats. Images can be handled as multi-resolution tiles. large_image has numerous tile sources to handle different formats. + +- `HistomicUI `_. This provides a user interface to examine and annotate large images. + +- `Slicer CLI Web `_. This can run processing tasks in Docker containers. Tasks report their capabilities via the Slicer CLI standard, listing required and optional inputs and outputs. These tasks can be selected and configured via Girder and HistomicsUI and then run in a distributed fashion via Girder Worker. + +Slicer CLI Web runs tasks in Docker containers and is itself running in a Docker container (in Girder for determining options and Girder Worker to run the task). In order to allow a process in a docker container to create another docker container, the paths the docker executable and communications sockets are mounted from the host to the docker container. + +Permissions +----------- + +By default, the girder container is run in Docker privileged mode. This can be reduced to a small set of permissions (see the docker-compose.yml file for details), but these may vary depending on the host system. If no extra permissions are granted, or if the docker daemon is started with --no-new-privileges, or if libfuse is not installed on the host system, the internal fuse mount will not be started. This may prevent full functionality with non-filesystem assestores and with some multiple-file image formats. + +Customizing +----------- + +Since this uses standard docker compose, you can customize the process by creating a ``docker-compose.override.yml`` file in the same directory (or a yaml file of any name and use appropriate ``docker compose -f docker-compose.yml -f `` command). Further, if you mount a provisioning yaml file into the docker image, you can customize settings, plugins, resources, and other options. + +See the ``docker-compose.yml`` and ``provision.yaml`` files for details. + +Example +~~~~~~~ + +To add some additional girder plugins and mount additional directories for assetstores, you can do something like this: + +``docker-compose.override.yml``:: + + --- + version: '3' + services: + girder: + environment: + # Specify that we want to use the provisioning file + DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} + volumes: + # Mount the local provisioning file into the container + - ./provision.local.yaml:/opt/digital_slide_archive/devops/dsa/provision.yaml + # Also expose a local data mount into the container + - /mnt/data:/mnt/data + +``provision.local.yaml``:: + + --- + # Load some sample data + samples: True + # A list of additional pip modules to install + pip: + - girder-oauth + - girder-ldap + # rebuild the girder web client since we install some additional plugins + rebuild-client: True + # List slicer-cli-images to pull and load + slicer-cli-image: + - dsarchive/histomicstk:latest + - girder/slicer_cli_web:small + +Using Private Docker Registries for CLI images +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the principal abilities of the Digital Slide Archive is to run algorithms that are packages via Docker and expose their interface via the Slicer Execution Model. See `HistomicTK `_ as an example. + +For docker images that are published on public container registries, these can be imported either as part of the provisioning process or via the Slicer CLI Web plugin UI by using the docker image tag (e.g., ``dsarchive/histomcstk:latest``). + +Since private registries require authentication, pulling docker images from private registries will not work in the reference deployment without either logging into the running docker container (for both the main Girder container and for any and all girder_worker containers) and authenticating via the ``docker login `` OR by authenticating on the base operating system and passing through the authentication as part of the provisioning process. + +An example of passing through the authentication using docker compose is commented in the default docker-compose.yaml file. In this case, use ``docker login`` on the base machine running the DSA and on any worker machines. Use the appropriate override: + +``docker-compose.override.yml``:: + + --- + version: '3' + services: + girder: + environment: + DOCKER_CONFIG: /.docker + volumes: + - /home//.docker:/.docker:ro + worker: + environment: + DOCKER_CONFIG: /.docker + volumes: + - /home//.docker:/.docker:ro + +Docker images can then be added via the provisioning or via the UI using the appropriate private registry and tag (e.g., ``private_registry:5000/dsarchive/histomicstk:latest`` would pull the image from a registry called ``private_registry`` that serves data on port 5000). + +Database Backup +--------------- + +You may want to periodically back up the database. The standard ``mongodump`` tool can be used for this via a command line ``docker compose exec mongodb /usr/bin/mongodump --db girder --archive --gzip > dsa_girder.dump.gz``. Restoring is similar: ``docker compose exec -T mongodb /usr/bin/mongorestore --db girder --archive --gzip < /tmp/dsa_girder.dump.gz``; you may want to add ``--drop`` as flag to the restore process. See Mongo's official documentation for details. diff --git a/devops/slurm/docker-compose.yml b/devops/slurm/docker-compose.yml new file mode 100644 index 00000000..b79c3a0d --- /dev/null +++ b/devops/slurm/docker-compose.yml @@ -0,0 +1,193 @@ +--- +services: + girder: + image: dsarchive/dsa_common + build: + context: ../.. + # We use this to optionally set version information during the build + args: + DSA_VERSIONS: ${DSA_VERSIONS:-} + # Instead of privileged mode, fuse can use: + # devices: + # - /dev/fuse:/dev/fuse + # security_opt: + # - apparmor:unconfined + # cap_add: + # - SYS_ADMIN + # but these may be somewhat host specific, so we default to privileged. If + # the docker daemon is being run with --no-new-privileges, fuse may not + # work. + # See also https://github.com/docker/for-linux/issues/321 for possible + # methods to avoid both privileged mode and cap_add SYS_ADMIN. + privileged: true + # Set DSA_USER to a user id that is part of the docker group (e.g., + # `DSA_USER=$(id -u):$(id -g)`). This makes files in assetstores and logs + # owned by that user and provides permissions to manage docker + environment: + DSA_USER: ${DSA_USER:-} + DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} + # Mount options can be used to, for instance, add diskcache (e.g., + # "-o diskcache,diskcache_size_limit=2147483648") + DSA_GIRDER_MOUNT_OPTIONS: ${DSA_GIRDER_MOUNT_OPTIONS:-} + # You can also set girder settings here: + # GIRDER_SETTING_CORE_HTTP_ONLY_COOKIES: true + # GIRDER_SETTING_HISTOMICSUI_LOGIN_SESSION_EXPIRY_MINUTES: 15 + # If you want to authorize docker image repositories on the host machine + # and have them accessed without further authorization within Girder, + # you can specify a docker config location, mount it (see volumes, + # below), and do "docker login " on the host machine before + # starting the DSA. + # DOCKER_CONFIG: /.docker + restart: unless-stopped + # Set DSA_PORT to expose the interface on another port (default 8080). + ports: + - "${DSA_PORT:-8080}:8080" + volumes: + # Needed to use slicer_cli_web to run docker containers + - /var/run/docker.sock:/var/run/docker.sock + # Default assetstore + - ./assetstore:/assetstore + # Location of girder.cfg + - ./girder.cfg:/etc/girder.cfg + # Location of provision.py + - ./provision.py:/opt/digital_slide_archive/devops/dsa/provision.py + - ./provision.yaml:/opt/digital_slide_archive/devops/dsa/provision.yaml + - ./start_girder.sh:/opt/digital_slide_archive/devops/dsa/start_girder.sh + # Location to store logs + - ./logs:/logs + + # For local development, uncomment the set of mounts associated with the + # local source files. Adding the editable egg directories first allows + # mounting source files from the host without breaking the internal data. + # - /opt/HistomicsUI/histomicsui.egg-info + # - ../../../HistomicsUI:/opt/HistomicsUI + + # See comments about authorizing docker repositories above + # - /home//.docker:/.docker:ro + + # Add additional mounts here to get access to existing files on your + # system. Also add them to the worker container to reduce copying. + depends_on: + - mongodb + - memcached + - rabbitmq + command: /opt/digital_slide_archive/devops/dsa/start_girder.sh + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/api/v1/system/version"] + interval: 5m + timeout: 10s + retries: 3 + start_period: 30s + mongodb: + image: "mongo:latest" + # Set DSA_USER to your user id (e.g., `DSA_USER=$(id -u):$(id -g)`) + # so that database files are owned by yourself. + user: ${DSA_USER:-PLEASE SET DSA_USER} + restart: unless-stopped + # Limiting maxConns reduces the amount of shared memory demanded by + # mongo. Remove this limit or increase the host vm.max_map_count value. + command: --maxConns 1000 + volumes: + # Location to store database files + - ./db:/data/db + # Uncomment to allow access to the database from outside of the docker + # network. + # ports: + # - "27017" + logging: + options: + max-size: "10M" + max-file: "5" + healthcheck: + test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet + interval: 5m + timeout: 10s + retries: 3 + start_period: 30s + memcached: + image: memcached + command: -m 4096 --max-item-size 8M + restart: unless-stopped + # Uncomment to allow access to memcached from outside of the docker network + # ports: + # - "11211" + logging: + options: + max-size: "10M" + max-file: "5" + healthcheck: + test: ["CMD", "bash", "-c", 'exec 3<>/dev/tcp/localhost/11211; printf "stats\nquit\n" >&3; cat <&3'] + interval: 5m + timeout: 10s + retries: 3 + start_period: 30s + rabbitmq: + image: "rabbitmq:latest" + restart: unless-stopped + # Uncomment to allow access to rabbitmq from outside of the docker network + # ports: + # - "5672" + environment: + RABBITMQ_DEFAULT_USER: ${RABBITMQ_DEFAULT_USER:-} + RABBITMQ_DEFAULT_PASS: ${RABBITMQ_DEFAULT_PASS:-} + volumes: + - ./rabbitmq.advanced.config:/etc/rabbitmq/advanced.config:ro + logging: + options: + max-size: "10M" + max-file: "5" + healthcheck: + test: rabbitmq-diagnostics -q ping + interval: 30s + timeout: 30s + retries: 3 + worker: + image: dsarchive/dsa_common + build: + context: ../.. + # We use this to optionally set version information during the build + args: + DSA_VERSIONS: ${DSA_VERSIONS:-} + # Set DSA_USER to a user id that is part of the docker group (e.g., + # `DSA_USER=$(id -u):$(id -g)`). This provides permissions to manage + # docker + environment: + DSA_USER: ${DSA_USER:-} + DSA_WORKER_CONCURRENCY: ${DSA_WORKER_CONCURRENCY:-2} + DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} + TMPDIR: + + # See comments about authorizing docker repositories above + # DOCKER_CONFIG: /.docker + restart: unless-stopped + volumes: + # Needed to use slicer_cli_web to run docker containers + - /var/run/docker.sock:/var/run/docker.sock + # Modify the worker.local.cfg to specify a different rabbitmq server and + # then enable this mount. On the rabbitmq server, make sure you add a + # non-guest default user and use that both in the worker and in the main + # girder settings. + # - ./worker.local.cfg:/opt/girder_worker/girder_worker/worker.local.cfg + # Allow overriding the start command + - ./start_worker.sh:/opt/digital_slide_archive/devops/dsa/start_worker.sh + # Needed to allow transferring data to slicer_cli_web docker containers + - ${TMPDIR:-/tmp}:${TMPDIR:-/tmp} + + # See comments about authorizing docker repositories above + # - /home//.docker:/.docker:ro + + # Add additional mounts here to get access to existing files on your + # system if they have the same path as on the girder container. + depends_on: + - rabbitmq + command: /opt/digital_slide_archive/devops/dsa/start_worker.sh + healthcheck: + test: ["CMD", "celery", "-b", "amqp://rabbitmq:5672", "inspect", "ping"] + interval: 5m + timeout: 10s + retries: 3 + start_period: 30s + logging: + options: + max-size: "10M" + max-file: "5" diff --git a/devops/slurm/girder.cfg b/devops/slurm/girder.cfg new file mode 100644 index 00000000..8b64144c --- /dev/null +++ b/devops/slurm/girder.cfg @@ -0,0 +1,45 @@ +[global] +server.socket_host = "0.0.0.0" +server.max_request_body_size = 1073741824 + +[database] +uri = "mongodb://mongodb:27017/girder?socketTimeoutMS=3600000" + +[server] +# Set to "production" or "development" +mode = "development" +# Disable the event daemon if you do not wish to run event handlers in a background thread. +# This may be necessary in certain deployment modes. +disable_event_daemon = False + +[logging] +log_root = "/logs" +log_access = ["screen", "info"] +# Log everything to the info log (errors also go to the error log) +log_max_info_level = "CRITICAL" +# Increase maximum size of log file +log_max_size = "10 Mb" + +[large_image] +# cache_backend is either "memcached" (default) or "python" +cache_backend = "memcached" +cache_memcached_url = "memcached" +cache_memcached_username = None +cache_memcached_password = None +# cache_python_memory_portion affects memory use when using python caching. +# Higher numbers use less memory. +# cache_python_memory_portion = 8 +# These can be used to reduce the amount of memory used for caching tile +# sources +# cache_tilesource_memory_portion = 16 +cache_tilesource_maximum = 64 + +[cache] +enabled = True + +[histomicsui] +# If restrict_downloads is True, only logged-in users can access download +# and tiles/images endpoints. If this is a number, file and item download +# endpoints can be used by anonymous users for files up to the specified +# size in bytes. This setting does not affect logged-in users. +restrict_downloads = 100000 diff --git a/devops/slurm/provision.py b/devops/slurm/provision.py new file mode 100755 index 00000000..3d415628 --- /dev/null +++ b/devops/slurm/provision.py @@ -0,0 +1,769 @@ +#!/usr/bin/env python3 + +import argparse +import configparser +import logging +import os +import subprocess +import sys +import tempfile +import time + +import yaml + +logger = logging.getLogger(__name__) +# See http://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library +logging.getLogger(__name__).addHandler(logging.NullHandler()) + + +def get_collection_folder(adminUser, collName, folderName): + from girder.models.collection import Collection + from girder.models.folder import Folder + + if Collection().findOne({'lowerName': collName.lower()}) is None: + logger.info('Create collection %s', collName) + Collection().createCollection(collName, adminUser) + collection = Collection().findOne({'lowerName': collName.lower()}) + if Folder().findOne({ + 'parentId': collection['_id'], 'lowerName': folderName.lower()}) is None: + logger.info('Create folder %s in %s', folderName, collName) + Folder().createFolder(collection, folderName, parentType='collection', + public=True, creator=adminUser) + folder = Folder().findOne({'parentId': collection['_id'], 'lowerName': folderName.lower()}) + return folder + + +def get_sample_data(adminUser, collName='Sample Images', folderName='Images'): + """ + As needed, download sample data. + + :param adminUser: a user to create and modify collections and folders. + :param collName: the collection name where the data will be added. + :param folderName: the folder name where the data will be added. + :returns: the folder where the sample data is located. + """ + try: + import girder_client + import requests + import urllib3 + except ImportError: + logger.error('girder_client is unavailable. Cannot get sample data.') + return + from girder.models.item import Item + from girder.models.upload import Upload + from girder_large_image.models.image_item import ImageItem + + folder = get_collection_folder(adminUser, collName, folderName) + remote = girder_client.GirderClient(apiUrl='https://data.kitware.com/api/v1') + session = requests.Session() + retries = urllib3.util.retry.Retry( + total=10, backoff_factor=0.1, status_forcelist=[104, 500, 502, 503, 504]) + session.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries)) + session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries)) + remote._session = session + + remoteFolder = remote.resourceLookup('/collection/HistomicsTK/Deployment test images') + sampleItems = [] + for remoteItem in remote.listItem(remoteFolder['_id']): + item = Item().findOne({'folderId': folder['_id'], 'name': remoteItem['name']}) + if item and len(list(Item().childFiles(item, limit=1))): + sampleItems.append(item) + continue + if not item: + item = Item().createItem(remoteItem['name'], creator=adminUser, folder=folder) + for remoteFile in remote.listFile(remoteItem['_id']): + with tempfile.NamedTemporaryFile() as tf: + fileName = tf.name + tf.close() + logger.info('Downloading %s', remoteFile['name']) + remote.downloadFile(remoteFile['_id'], fileName) + Upload().uploadFromFile( + open(fileName, 'rb'), os.path.getsize(fileName), + name=remoteItem['name'], parentType='item', + parent=item, user=adminUser) + sampleItems.append(item) + for item in sampleItems: + if 'largeImage' not in item: + logger.info('Making large_item %s', item['name']) + try: + ImageItem().createImageItem(item, createJob=False) + except Exception: + pass + logger.info('done') + return folder + + +def value_from_resource(value, adminUser): + """ + If a value is a string that startwith 'resource:', it is a path to an + existing resource. Fetch it an return the string of the _id. + + :param value: a value + :returns: the original value it is not a resource, or the string id of the + resource. + """ + import girder.utility.path as path_util + from girder.models.assetstore import Assetstore + + starts = {'resource:': 'doc', 'resourceid:': 'id', 'resourceobjid:': 'obj'} + if isinstance(value, dict): + value = {k: value_from_resource(v, adminUser) for k, v in value.items()} + for start, stype in starts.items(): + if str(value).startswith(start): + resPath = value.split(':', 1)[1] + if resPath == 'admin': + resource = adminUser + elif resPath.startswith('assetstore/'): + resource = Assetstore().findOne({'name': value.split('/', 1)[1]}) + else: + resource = path_util.lookUpPath(resPath, force=True)['document'] + logger.info(f'Finding {start} reference for {resPath} as ' + f'{resource["_id"] if resource else resource}') + if stype == 'doc': + value = resource + elif stype == 'id': + value = str(resource['_id']) + else: + value = resource['_id'] + break + return value + + +def provision_resources(resources, adminUser): + """ + Given a dictionary of resources, add them to the system. The resource is + only added if a resource of that name with the same parent object does not + exist. + + :param resources: a list of resources to add. + :param adminUser: the admin user to use for provisioning. + """ + from girder.utility.model_importer import ModelImporter + + for entry in resources: + entry = {k: value_from_resource(v, adminUser) for k, v in entry.items()} + modelName = entry.pop('model') + metadata = entry.pop('metadata', None) + metadata_update = entry.pop('metadata_update', True) + metadata_key = entry.pop('metadata_key', 'meta') + attrs = entry.pop('attrs', None) + attrs_update = entry.pop('attrs_update', True) + model = ModelImporter.model(modelName) + key = 'name' if model != 'user' else 'login' + query = {} + if key in entry: + query[key] = entry[key] + owners = {'folder': 'parent', 'item': 'folder', 'file': 'item'} + ownerKey = owners.get(modelName) + if ownerKey and ownerKey in entry and isinstance( + entry[ownerKey], dict) and '_id' in entry[ownerKey]: + query[ownerKey + 'Id'] = entry[ownerKey]['_id'] + if query and model.findOne(query): + result = model.findOne(query) + logger.debug('Has %s (%r)', modelName, entry) + else: + createFunc = getattr(model, 'create%s' % modelName.capitalize()) + logger.info('Creating %s (%r)', modelName, entry) + result = createFunc(**entry) + attrs_update = True + if isinstance(metadata, dict) and hasattr(model, 'setMetadata'): + if metadata_key not in metadata or metadata_update: + if metadata_key not in result: + result[metadata_key] = {} + result[metadata_key].update(metadata.items()) + for key in metadata: + if metadata[key] is None: + del result[metadata_key][key] + model.validateKeys(result[metadata_key]) + result = model.save(result) + if attrs and attrs_update: + result.update(attrs) + result = model.save(result) + + +def get_slicer_images(imageList, adminUser, alwaysPull=False): + """ + Load a list of cli docker images into the system. + + :param imageList: a list of docker images. + :param adminUser: an admin user for permissions. + :param alwaysPull: true to ask to always pull the latest image. + """ + import threading + + from girder import logger + from girder.models.setting import Setting + from girder_jobs.constants import JobStatus + from girder_jobs.models.job import Job + from slicer_cli_web.config import PluginSettings + from slicer_cli_web.docker_resource import DockerResource + from slicer_cli_web.image_job import jobPullAndLoad + + imageList = [entry for entry in imageList if entry and len(entry)] + if not len(imageList): + return + logger.info('Pulling and installing slicer_cli images: %r', imageList) + job = Job().createLocalJob( + module='slicer_cli_web.image_job', + function='jobPullAndLoad', + kwargs={ + 'nameList': imageList, + 'folder': Setting().get(PluginSettings.SLICER_CLI_WEB_TASK_FOLDER), + 'pull': 'true' if alwaysPull else 'asneeded', + }, + title='Pulling and caching docker images', + type=DockerResource.jobType, + user=adminUser, + public=True, + asynchronous=True + ) + job = Job().save(job) + t = threading.Thread(target=jobPullAndLoad, args=(job, )) + t.start() + logpos = 0 + logger.info('Result:\n') + while job['status'] not in {JobStatus.SUCCESS, JobStatus.ERROR, JobStatus.CANCELED}: + time.sleep(0.1) + job = Job().load(id=job['_id'], user=adminUser, includeLog=True) + if 'log' in job: + while logpos < len(job['log']): + if 'Pulling' not in job['log'][logpos] or '%' not in job['log'][logpos]: + logger.info(job['log'][logpos].rstrip()) + logpos += 1 + t.join() + if 'log' not in job: + logger.warning('Job record: %r', job) + if job['status'] != JobStatus.SUCCESS: + raise Exception('Failed to pull and load images') + + +def pip_install(packages): + """ + Pip install a list of packages via the shell pip install command. This + first tries installing all of the packages in a single command; if it + fails, they are tried individually to betetr show where the failure occurs. + + :param packages: a list of strings to add to the end of the pip install + command. + """ + if not packages or not len(packages): + return + cmd = 'pip install -q ' + ' '.join(packages) + logger.info('Installing: %s', cmd) + try: + subprocess.check_call(cmd, shell=True) + except Exception: + logger.error(f'Failed to run {cmd}; trying pip install individually.') + for entry in packages: + cmd = 'pip install %s' % entry + logger.info('Installing: %s', cmd) + try: + subprocess.check_call(cmd, shell=True) + except Exception: + logger.error(f'Failed to run {cmd}') + raise + + +def preprovision(opts): + """ + Preprovision the instance. This includes installing python modules with + pip and rebuilding the girder client if desired. + + :param opts: the argparse options. + """ + pip_install(getattr(opts, 'pip', None)) + if getattr(opts, 'shell', None) and len(opts.shell): + for entry in opts.shell: + cmd = entry + logger.info('Running: %s', cmd) + try: + subprocess.check_call(cmd, shell=True) + except Exception: + logger.error(f'Failed to run {cmd}') + raise + if getattr(opts, 'rebuild-client', None): + cmd = 'girder build' + if str(getattr(opts, 'rebuild-client', None)).lower().startswith('dev'): + cmd += ' --dev' + logger.info('Rebuilding girder client: %s', cmd) + cmd = ('NPM_CONFIG_FUND=false NPM_CONFIG_AUDIT=false ' + 'NPM_CONFIG_AUDIT_LEVEL=high NPM_CONFIG_LOGLEVEL=error ' + 'NPM_CONFIG_PROGRESS=false NPM_CONFIG_PREFER_OFFLINE=true ' + cmd) + try: + if not getattr(opts, 'no_wait', False): + subprocess.check_call(cmd, shell=True) + else: + proc = subprocess.Popen(cmd + ' ; touch /tmp/girder_build_done', shell=True) + logger.info('Rebuilding in background via pid %r', proc.pid) + open('/tmp/girder_build.pid', 'w').write(str(proc.pid)) + except Exception: + logger.error(f'Failed to run {cmd}') + raise + + +def clean_delete_locks(): + from girder.constants import AssetstoreType + from girder.models.assetstore import Assetstore + + for assetstore in Assetstore().find(): + if assetstore['type'] != AssetstoreType.FILESYSTEM: + continue + rootpath = assetstore['root'] + cmd = ['find', rootpath, '-name', '*.deleteLock', '-delete'] + logger.info(f'Removing old delete locks: {cmd}') + try: + subprocess.check_call(cmd, shell=False) + except Exception: + logger.info(f'Failed trying to remove old delete locks: {cmd}') + + +def provision(opts): # noqa + """ + Provision the instance. + + :param opts: the argparse options. + """ + from girder.models.assetstore import Assetstore + from girder.models.setting import Setting + from girder.models.user import User + + # If there is are no admin users, create an admin user + if User().findOne({'admin': True}) is None: + adminParams = dict({ + 'login': 'admin', + 'password': 'password', + 'firstName': 'Admin', + 'lastName': 'Admin', + 'email': 'admin@nowhere.nil', + 'public': True, + }, **(opts.admin if opts.admin else {})) + User().createUser(admin=True, **adminParams) + adminUser = User().findOne({'admin': True}) + + # Make sure we have an assetstore + assetstoreParams = opts.assetstore or {'name': 'Assetstore', 'root': '/assetstore'} + if not isinstance(assetstoreParams, list): + assetstoreParams = [assetstoreParams] + if Assetstore().findOne() is None: + for params in assetstoreParams: + method = params.pop('method', 'createFilesystemAssetstore') + getattr(Assetstore(), method)(**params) + + # Clean up old deleteLocks + if getattr(opts, 'clean-delete-locks', None): + clean_delete_locks() + + # Make sure we have a demo collection and download some demo files + if getattr(opts, 'samples', None): + get_sample_data( + adminUser, + getattr(opts, 'sample-collection', 'Samples'), + getattr(opts, 'sample-folder', 'Images')) + if opts.resources: + provision_resources(opts.resources, adminUser) + settings = dict({}, **(opts.settings or {})) + force = getattr(opts, 'force', None) or [] + for key, value in settings.items(): + if (value != '__SKIP__' and ( + force is True or key in force or + Setting().get(key) is None or + Setting().get(key) == Setting().getDefault(key))): + value = value_from_resource(value, adminUser) + logger.info('Setting %s to %r', key, value) + Setting().set(key, value) + images = [] + if getattr(opts, 'slicer-cli-image-pull', None): + images = list(dict.fromkeys(getattr(opts, 'slicer-cli-image-pull', None))) + try: + get_slicer_images(getattr(opts, 'slicer-cli-image-pull', None), + adminUser, alwaysPull=True) + except Exception: + logger.info('Cannot fetch slicer-cli-images.') + if getattr(opts, 'slicer-cli-image', None): + images = [image for image in dict.fromkeys(getattr(opts, 'slicer-cli-image', None)) + if image not in images] + try: + get_slicer_images(images, adminUser) + except Exception: + logger.info('Cannot fetch slicer-cli-images.') + + +def preprovision_worker(opts): + """ + Preprovision the worker. + """ + settings = dict({}, **(opts.worker or {})) + pip_install(settings.get('pip')) + if settings.get('shell') and len(settings['shell']): + for entry in settings['shell']: + cmd = entry + logger.info('Running: %s', cmd) + try: + subprocess.check_call(cmd, shell=True) + except Exception: + logger.error(f'Failed to run {cmd}') + raise + + +def provision_worker(opts): + """ + Provision the worker. There are a few top-level settings, but others + should be in the worker sub-field. + """ + settings = dict({}, **(opts.worker or {})) + for key in dir(opts): + if key.startswith('worker-'): + mainkey = key.split('worker-', 1)[1] + if settings.get(mainkey) is None: + settings[mainkey] = getattr(opts, key) + if not settings.get('rabbitmq-host'): + return + conf = configparser.ConfigParser() + conf.read([settings['config']]) + conf.set('celery', 'broker', 'amqp://%s:%s@%s/' % ( + settings['rabbitmq-user'], settings['rabbitmq-pass'], settings['host'])) + conf.set('celery', 'backend', 'rpc://%s:%s@%s/' % ( + settings['rabbitmq-user'], settings['rabbitmq-pass'], settings['host'])) + with open(settings['config'], 'w') as fptr: + conf.write(fptr) + + +def merge_environ_opts(opts): + """ + Merge environment options, overriding other settings. + + :param opts: the options parsed from the command line. + :return opts: the modified options. + """ + keyDict = { + 'RABBITMQ_USER': 'worker_rabbitmq_user', + 'RABBITMQ_PASS': 'worker_rabbitmq_pass', + 'DSA_RABBITMQ_HOST': 'worker_rabbitmq_host', + } + for key, value in os.environ.items(): + if not value or not value.strip(): + continue + if key == 'DSA_WORKER_API_URL': + key = 'worker.api_url' + elif key.startswith('DSA_SETTING_'): + key = key.split('DSA_SETTING_', 1)[1] + elif key in keyDict: + key = keyDict[key] + else: + continue + opts.settings[key] = value + if not opts.force: + opts.force = {key} + elif opts.force is not True: + opts.force = set(opts.force) + opts.force.add(key) + return opts + + +def merge_yaml_opts(opts, parser): + """ + Parse a yaml file of provisioning options. Modify the options used for + provisioning. + + :param opts: the options parsed from the command line. + :param parser: command line parser used to check if the options are the + default values. + :return opts: the modified options. + """ + yamlfile = os.environ.get('DSA_PROVISION_YAML') if getattr( + opts, 'yaml', None) is None else opts.yaml + if yamlfile: + logger.debug('Parse yaml file: %r', yamlfile) + if not yamlfile or not os.path.exists(yamlfile): + return opts + defaults = parser.parse_args(args=[]) + if getattr(opts, 'use-defaults', None) is not False: + defaults = merge_default_opts(defaults) + yamlopts = yaml.safe_load(open(yamlfile).read()) + for key, value in yamlopts.items(): + key = key.replace('_', '-') + if getattr(opts, key, None) is None or getattr( + opts, key, None) == getattr(defaults, key, None): + if key == 'settings' and getattr(opts, key, None) and isinstance(value, dict): + getattr(opts, key).update(value) + else: + setattr(opts, key, value) + logger.debug('Arguments after adding yaml: %r', opts) + return opts + + +def merge_default_opts(opts): + """ + Add the defaults to the options. + + :param opts: the options parsed from the command line. + :return opts: the modified options. + """ + settings = dict({}, **(opts.settings or {})) + settings.update({ + 'worker.broker': 'amqp://guest:guest@rabbitmq', + 'worker.backend': 'rpc://guest:guest@rabbitmq', + 'worker.api_url': 'http://girder:8080/api/v1', + 'worker.direct_path': True, + 'core.brand_name': 'Digital Slide Archive', + # 'core.http_only_cookies': True, + 'histomicsui.webroot_path': 'histomics', + 'histomicsui.alternate_webroot_path': 'histomicstk', + 'histomicsui.delete_annotations_after_ingest': True, + 'homepage.markdown': """# Digital Slide Archive +--- +## Bioinformatics Platform + +Welcome to the **Digital Slide Archive**. + +Developers who want to use the Girder REST API should check out the +[interactive web API docs](api/v1). + +The [HistomicsUI](histomics) application is enabled.""", + 'slicer_cli_web.task_folder': 'resourceid:collection/Tasks/Slicer CLI Web Tasks', + }) + opts.settings = settings + if getattr(opts, 'slicer-cli-image-pull', None) is None: + setattr(opts, 'slicer-cli-image-pull', ['dsarchive/histomicstk:latest']) + if getattr(opts, 'assetstore', None) is None: + opts.assetstore = { + 'name': 'Assetstore', + 'root': '/assetstore', + 'method': 'createFilesystemAssetstore', + } + if getattr(opts, 'admin', None) is None: + opts.admin = { + 'login': 'admin', + 'password': 'password', + 'firstName': 'Admin', + 'lastName': 'Admin', + 'email': 'admin@nowhere.nil', + 'public': True, + } + if getattr(opts, 'clean-delete-locks', None) is None: + setattr(opts, 'clean-delete-locks', True) + resources = opts.resources or [] + resources.extend([{ + 'model': 'collection', + 'name': 'Tasks', + 'creator': 'resource:admin', + 'public': True, + }, { + 'model': 'folder', + 'parent': 'resource:collection/Tasks', + 'parentType': 'collection', + 'name': 'Slicer CLI Web Tasks', + 'creator': 'resource:admin', + 'public': True, + }]) + opts.resources = resources + return opts + + +class YamlAction(argparse.Action): + def __init__(self, option_strings, dest, nargs=None, **kwargs): + """Parse a yaml entry""" + if nargs is not None: + raise ValueError('nargs not allowed') + super().__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, yaml.safe_load(values)) + + +if __name__ == '__main__': # noqa + parser = argparse.ArgumentParser(description='Provision a Digital Slide Archive instance') + parser.add_argument( + '--force', action='store_true', + help='Reset all settings. This does not change the admin user or the ' + 'default assetstore if those already exist. Otherwise, settings are ' + 'only added or modified if they do not exist or are the default ' + 'value.') + parser.add_argument( + '--samples', '--data', '--sample-data', + action='store_true', help='Download sample data') + parser.add_argument( + '--clean-delete-locks', action='store_true', + help='Remove assetstore delete locks on start') + parser.add_argument( + '--no-clean-delete-locks', action='store_false', + dest='clean-delete-locks', + help='Do not remove assetstore delete locks on start') + parser.add_argument( + '--sample-collection', dest='sample-collection', default='Samples', + help='Sample data collection name') + parser.add_argument( + '--sample-folder', dest='sample-folder', default='Images', + help='Sample data folder name') + parser.add_argument( + '--admin', action=YamlAction, + help='A yaml dictionary of parameters used to create a default admin ' + 'user. If any of login, password, firstName, lastName, email, or ' + 'public are not specified, some default values are used. If any ' + 'admin user already exists, no modifications are made.') + parser.add_argument( + '--assetstore', action=YamlAction, + help='A yaml dictionary (or list of dictionaries) of parameters used ' + 'to create a default assetstore. This can include "method" which ' + 'includes the creation method, such as "createFilesystemAssetstore" ' + 'or "createS3Assetstore". Otherwise, this is a list of parameters ' + 'passed to the creation method. For filesystem assetstores, these ' + 'parameters are name, root, and perms. For S3 assetstores, these are ' + 'name, bucket, accessKeyId, secret, prefix, service, readOnly, ' + 'region, inferCredentials, and serverSideEncryption. If unspecified, ' + 'a filesystem assetstore is created.') + parser.add_argument( + '--settings', action=YamlAction, + help='A yaml dictionary of settings to change in the Girder ' + 'database. This is merged with the default settings dictionary. ' + 'Settings are only changed if they are their default values, the ' + 'force option is used, or they are specified by an environment ' + 'variable. If a setting has a value of "__SKIP__", it will not be ' + 'changed (this can prevent setting a default setting ' + 'option to any value).') + parser.add_argument( + '--resources', action=YamlAction, + help='A yaml list of resources to add by name to the Girder ' + 'database. Each entry is a dictionary including "model" with the ' + 'resource model and a dictionary of values to pass to the ' + 'appropriate create(resource) function. A value of ' + '"resource:" is converted to the resource document with that ' + 'resource path. "resource:admin" uses the default admin, ' + '"resourceid:" is the string id for the resource path.') + parser.add_argument( + '--yaml', + help='Specify parameters for this script in a yaml file. If no value ' + 'is specified, this defaults to the environment variable of ' + 'DSA_PROVISION_YAML. No error is thrown if the file does not exist. ' + 'The yaml file is a dictionary of keys as would be passed to the ' + 'command line.') + parser.add_argument( + '--no-mongo-compat', action='store_false', dest='mongo-compat', + default=True, help='Do not automatically set the mongo feature ' + 'compatibility version to the current server version.') + parser.add_argument( + '--no-defaults', action='store_false', dest='use-defaults', + default=None, help='Do not use default settings; start with a minimal ' + 'number of parameters.') + parser.add_argument( + '--pip', action='append', help='A list of modules to pip install. If ' + 'any are specified that include girder client plugins, also specify ' + '--rebuild-client. Each specified value is passed to pip install ' + 'directly, so additional options are needed, these can be added (such ' + 'as --find-links). The actual values need to be escaped ' + 'appropriately for a bash shell.') + parser.add_argument( + '--rebuild-client', dest='rebuild-client', action='store_true', + default=False, help='Rebuild the girder client.') + parser.add_argument( + '--slicer-cli-image', dest='slicer-cli-image', action='append', + help='Install slicer_cli images, only pulling if not present.') + parser.add_argument( + '--slicer-cli-image-pull', dest='slicer-cli-image-pull', action='append', + help='Install slicer_cli images, always pulling the latest.') + + parser.add_argument( + '--rabbitmq-user', default='guest', dest='worker-rabbitmq-user', + help='Worker: RabbitMQ user name.') + parser.add_argument( + '--rabbitmq-pass', default='guest', dest='worker-rabbitmq-pass', + help='Worker: RabbitMQ password.') + parser.add_argument( + '--rabbitmq-host', dest='worker-rabbitmq-host', + help='Worker: RabbitMQ host.') + parser.add_argument( + '--config', dest='worker-config', + default='/opt/girder_worker/girder_worker/worker.local.cfg', + help='Worker: Path to the worker config file.') + parser.add_argument( + '--worker', action=YamlAction, + help='A yaml dictionary of worker settings.') + parser.add_argument( + '--worker-main', dest='portion', action='store_const', + const='worker-main', + help='Provision a worker, not the main process.') + parser.add_argument( + '--worker-pre', dest='portion', action='store_const', + const='worker-pre', + help='Pre-provision a worker, not the main process.') + parser.add_argument( + '--pre', dest='portion', action='store_const', const='pre', + help='Only do preprovisioning (install optional python modules and ' + 'optionally build the girder client).') + parser.add_argument( + '--main', dest='portion', action='store_const', const='main', + help='Only do main provisioning.') + parser.add_argument( + '--no-wait', action='store_true', + help='If a girder build is performed during preprovisioning, do not ' + 'wait for it to complete.') + parser.add_argument( + '--verbose', '-v', action='count', default=0, help='Increase verbosity') + parser.add_argument( + '--dry-run', '-n', dest='dry-run', action='store_true', + help='Report merged options but do not actually apply them') + opts = parser.parse_args(args=sys.argv[1:]) + logger.addHandler(logging.StreamHandler(sys.stderr)) + logger.setLevel(max(1, logging.WARNING - 10 * opts.verbose)) + try: + logger.info('Provision file date: %s; size: %d', + time.ctime(os.path.getmtime(__file__)), + os.path.getsize(__file__)) + except Exception: + pass + logger.debug('Parsed arguments: %r', opts) + if getattr(opts, 'use-defaults', None) is not False: + opts = merge_default_opts(opts) + opts = merge_yaml_opts(opts, parser) + opts = merge_environ_opts(opts) + logger.debug('Merged arguments: %r', opts) + if getattr(opts, 'dry-run'): + print(yaml.dump({k: v for k, v in vars(opts).items() if v is not None})) + sys.exit(0) + # Worker provisioning + if getattr(opts, 'portion', None) == 'worker-pre': + preprovision_worker(opts) + sys.exit(0) + if getattr(opts, 'portion', None) == 'worker-main': + provision_worker(opts) + sys.exit(0) + if getattr(opts, 'portion', None) in {'pre', None}: + # Run provisioning that has to happen before configuring the server. + preprovision(opts) + if getattr(opts, 'portion', None) == 'pre': + sys.exit(0) + if getattr(opts, 'portion', None) in {'main', None}: + # This loads plugins, allowing setting validation. We want the import + # to be after the preprovision step. + from girder import _attachFileLogHandlers + from girder.utility.server import configureServer + + _attachFileLogHandlers() + configureServer() + if getattr(opts, 'mongo-compat', None) is not False: + from girder.models import getDbConnection + + try: + db = getDbConnection() + except Exception: + logger.warning('Could not connect to mongo.') + try: + # In mongo shell, this is functionally + # db.adminCommand({setFeatureCompatibilityVersion: + # db.version().split('.').slice(0, 2).join('.')}) + db.admin.command({'setFeatureCompatibilityVersion': '.'.join( + db.server_info()['version'].split('.')[:2]), 'confirm': True}) + except Exception: + try: + db.admin.command({'setFeatureCompatibilityVersion': '.'.join( + db.server_info()['version'].split('.')[:2])}) + except Exception: + logger.warning('Could not set mongo feature compatibility version.') + try: + # Also attempt to upgrade old version 2 image sources + db.girder.item.update_many( + {'largeImage.sourceName': 'svs'}, + {'$set': {'largeImage.sourceName': 'openslide'}}) + except Exception: + logger.warning('Could not update old source names.') + provision(opts) diff --git a/devops/slurm/provision.yaml b/devops/slurm/provision.yaml new file mode 100644 index 00000000..0a30b255 --- /dev/null +++ b/devops/slurm/provision.yaml @@ -0,0 +1,126 @@ +--- +# The provision script can take a yaml file with provision options + +# This is a dictionary of command-line arguments for the provisioning script +# If force is True, set all settings even if they are already set. This may +# also be a list of settings keys to force. It does not change the admin user +# or the default assetstore if those already exist. If False, settings are +# only added or modified if they do not exist or are the default value. +force: False +samples: False +clean-delete-locks: True +sample-collection: Samples +sample-folder: Images +# Set use-defaults to False to skip default settings +use-defaults: True +# Set mongo_compat to False to not automatically set the mongo feature +# compatibility version to the current server version. +mongo-compat: True +# A list of additional pip modules to install; if any are girder plugins with +# client-side code, also specify rebuild-client. +# pip: +# - girder-oauth +# - girder-ldap +# rebuild-client may be False, True (for production mode), or "development" +rebuild-client: False +# Run additional shell commands before start +# shell: +# - ls +# Default admin user if there are no admin users +admin: + login: admin + password: password + firstName: Admin + lastName: Admin + email: admin@nowhere.nil + public: True +# Default assetstore if there are no assetstores +assetstore: + method: createFilesystemAssetstore + name: Assetstore + root: /assetstore +# Any resources to ensure exist. A model must be specified. This creates the +# resource if there is no match for all specified values other than metadata +# and attrs. A value of "resource:" is converted to the resource +# document with that resource path. "resourceid:" is the string id for +# the resource path, "resourceobjid:" is the Mongo object id for the +# resource path. A of "admin" uses the default admin user. A of +# the form "assetstore/" references the named assetstore. +# You can add metadata to a resource. The default key is meta. If +# metadata_update is False, metadata will not be set if any metadata +# already exists. +# You can alter the core model of a resource using the attrs key. If the +# resource already exists, this will only be applied if attrs_update is True. +resources: + - model: collection + name: Tasks + creator: resource:admin + public: True + - model: folder + parent: resource:collection/Tasks + parentType: collection + name: "Slicer CLI Web Tasks" + creator: resource:admin + public: True + # metadata: + # sample_key: sample_value + # metadata_key: meta + # metadata_update: True + # attrs: + # quota: + # preferredAssetstore: resource:assetstore/Assetstore + # attrs_update: True +settings: + worker.broker: "amqp://guest:guest@rabbitmq" + worker.backend: "rpc://guest:guest@rabbitmq" + worker.api_url: "http://girder:8080/api/v1" + worker.direct_path: True + core.brand_name: "Digital Slide Archive" + # core.http_only_cookies: True + histomicsui.webroot_path: "histomics" + histomicsui.alternate_webroot_path: "histomicstk" + histomicsui.delete_annotations_after_ingest: True + homepage.markdown: |- + # Digital Slide Archive + --- + ## Bioinformatics Platform + + Welcome to the **Digital Slide Archive**. + + Developers who want to use the Girder REST API should check out the + [interactive web API docs](api/v1). + + The [HistomicsUI](histomics) application is enabled. + slicer_cli_web.task_folder: "resourceid:collection/Tasks/Slicer CLI Web Tasks" +# List slicer-cli-images to pull, if not present, and load +# slicer-cli-image: +# - dsarchive/histomicstk:latest +# List slicer-cli-images to always pull, and load +slicer-cli-image-pull: + - dsarchive/histomicstk:latest +# The worker can specify parameters for provisioning +# worker-rabbitmq-host: girder:8080 +worker-rabbitmq-user: guest +worker-rabbitmq-pass: guest +worker-config: /opt/girder_worker/girder_worker/worker.local.cfg +# These have precedence over the top level values +worker: + # rabbitmq-host: girder:8080 + # rabbitmq-user: guest + # rabbitmq-pass: guest + # config: /opt/girder_worker/girder_worker/worker.local.cfg + # Install additional pip packages in the worker + # pip: + # - package_one + # Run additional shell commands in the worker before start + # shell: + # - ls +shell: + - rm -rf /opt/girder_worker /opt/slicer_cli_web + - git clone --branch slurm https://github.com/girder/girder_worker.git /opt/girder_worker + - git clone --branch slicer-cli-web-singularity https://github.com/girder/slicer_cli_web.git /opt/slicer_cli_web + - pip install -e /opt/girder_worker + - pip install -e /opt/girder_worker/girder_worker/singularity + - pip install -e /opt/girder_worker/girder_worker/slurm + - pip install -e /opt/slicer_cli_web + - pip install -e /opt/slicer_cli_web/slicer_cli_web/singularity diff --git a/devops/slurm/rabbitmq.advanced.config b/devops/slurm/rabbitmq.advanced.config new file mode 100644 index 00000000..893aae56 --- /dev/null +++ b/devops/slurm/rabbitmq.advanced.config @@ -0,0 +1 @@ +[ {rabbit, [ {consumer_timeout, undefined} ]} ]. diff --git a/devops/slurm/start_girder.sh b/devops/slurm/start_girder.sh new file mode 100755 index 00000000..b37a66da --- /dev/null +++ b/devops/slurm/start_girder.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Ensures that the main process runs as the DSA_USER and is part of both that +# group and the docker group. Fail if DSA_USER is not specified. +if [[ -z "$DSA_USER" ]]; then + echo "Set the DSA_USER before starting (e.g, DSA_USER=\$$(id -u):\$$(id -g) " + exit 1 +fi +# add a user with the DSA_USER's id; this user is named ubuntu if it doesn't +# exist. +adduser --uid ${DSA_USER%%:*} --disabled-password --gecos "" ubuntu 2>/dev/null +# add a group with the DSA_USER's group id. +addgroup --gid ${DSA_USER#*:} $(id -ng ${DSA_USER#*:}) 2>/dev/null +# add the user to the user group. +adduser $(id -nu ${DSA_USER%%:*}) $(getent group ${DSA_USER#*:} | cut "-d:" -f1) 2>/dev/null +# add a group with the docker group id. +addgroup --gid $(stat -c "%g" /var/run/docker.sock) docker 2>/dev/null +# add the user to the docker group. +adduser $(id -nu ${DSA_USER%%:*}) $(getent group $(stat -c "%g" /var/run/docker.sock) | cut "-d:" -f1) 2>/dev/null +# Try to increase permissions for the docker socket; this helps this work on +# OSX where the users don't translate +chmod 777 /var/run/docker.sock 2>/dev/null || true +# Use iptables to make some services appear as if they are on localhost (as +# well as on the docker network). This is done to allow tox tests to run. +sysctl -w net.ipv4.conf.eth0.route_localnet=1 +iptables -t nat -A OUTPUT -o lo -p tcp -m tcp --dport 27017 -j DNAT --to-destination `dig +short mongodb`:27017 +iptables -t nat -A OUTPUT -o lo -p tcp -m tcp --dport 11211 -j DNAT --to-destination `dig +short memcached`:11211 +iptables -t nat -A POSTROUTING -o eth0 -m addrtype --src-type LOCAL --dst-type UNICAST -j MASQUERADE +echo 'PATH="/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH"' >> /home/$(id -nu ${DSA_USER%%:*})/.bashrc +echo ==== Pre-Provisioning === +PATH="/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH" \ +python /opt/digital_slide_archive/devops/dsa/provision.py -v --pre --no-wait +# Run subsequent commands as the DSA_USER. This sets some paths based on what +# is expected in the Docker so that the current python environment and the +# devops/dsa/utils are available. Then: +# - Provision the Girder instance. This sets values in the database, such as +# creating an admin user if there isn't one. See the provision.py script for +# the details. +# - If possible, set up a girder mount. This allows file-like access of girder +# resources. It requires the host to have fuse installed and the docker +# container to be run with enough permissions to use fuse. +# - Start the main girder process. +su $(id -nu ${DSA_USER%%:*}) -c " + PATH=\"/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH\"; + echo ==== Provisioning === && + python /opt/digital_slide_archive/devops/dsa/provision.py -v --main && + echo ==== Creating FUSE mount === && + (girder mount ${DSA_GIRDER_MOUNT_OPTIONS%%:-} /fuse || true) && + if [[ -f /tmp/girder_build.pid ]]; then + echo ==== Wait for girder build to finish === && + while [[ -e /proc/$(cat /tmp/girder_build.pid) && ! -f /tmp/girder_build_done ]]; do sleep 0.1; done && + true; fi && + echo ==== Starting Girder === && + girder serve +" diff --git a/devops/slurm/start_worker.sh b/devops/slurm/start_worker.sh new file mode 100755 index 00000000..0437668e --- /dev/null +++ b/devops/slurm/start_worker.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Ensures that the main process runs as the DSA_USER and is part of both that +# group and the docker group. Fail if DSA_USER is not specified. +if [[ -z "$DSA_USER" ]]; then + echo "Set the DSA_USER before starting (e.g, DSA_USER=\$$(id -u):\$$(id -g) " + exit 1 +fi +# add a user with the DSA_USER's id; this user is named ubuntu if it doesn't +# exist. +adduser --uid ${DSA_USER%%:*} --disabled-password --gecos "" ubuntu 2>/dev/null +# add a group with the DSA_USER's group id. +addgroup --gid ${DSA_USER#*:} $(id -ng ${DSA_USER#*:}) 2>/dev/null +# add the user to the user group. +adduser $(id -nu ${DSA_USER%%:*}) $(getent group ${DSA_USER#*:} | cut "-d:" -f1) 2>/dev/null +# add a group with the docker group id. +addgroup --gid $(stat -c "%g" /var/run/docker.sock) docker 2>/dev/null +# add the user to the docker group. +adduser $(id -nu ${DSA_USER%%:*}) $(getent group $(stat -c "%g" /var/run/docker.sock) | cut "-d:" -f1) 2>/dev/null +# Try to increase permissions for the docker socket; this helps this work on +# OSX where the users don't translate +chmod 777 /var/run/docker.sock 2>/dev/null || true +chmod 777 ${TMPDIR:-/tmp} || true +echo ==== Pre-Provisioning === +python3 /opt/digital_slide_archive/devops/dsa/provision.py --worker-pre +echo ==== Provisioning === && +python3 /opt/digital_slide_archive/devops/dsa/provision.py --worker-main +echo ==== Starting Worker === && +# Run subsequent commands as the DSA_USER. This sets some paths based on what +# is expected in the Docker so that the current python environment and the +# devops/dsa/utils are available. Then it runs girder_worker +su $(id -nu ${DSA_USER%%:*}) -c " + PATH=\"/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH\"; + DOCKER_CLIENT_TIMEOUT=86400 TMPDIR=${TMPDIR:-/tmp} GW_DIRECT_PATHS=true python -m girder_worker --concurrency=${DSA_WORKER_CONCURRENCY:-2} -Ofair --prefetch-multiplier=1 +" diff --git a/devops/slurm/utils/.vimrc b/devops/slurm/utils/.vimrc new file mode 100644 index 00000000..8b12d7ce --- /dev/null +++ b/devops/slurm/utils/.vimrc @@ -0,0 +1,39 @@ +" see :options +" expandtabs +set et +set tabstop=4 +" shiftwidth +set sw=4 +set nocindent +" autoindent +set ai +" tell indenting programs that we already indented the buffer +let b:did_indent = 1 +" don't do an incremental search (don't search before we finish typing) +set nois +" don't ignore case by default +set noic +" don't break at 80 characters +set wrap +" don't add linebreaks at 80 characters +set nolbr +" highlight all search matches +set hls +" default to utf-8 +set enc=utf-8 +" show the cursor position +set ruler +" allow backspace to go to the previous line +set bs=2 +" keep this much history +set history=50 +" don't try to maintain vi compatibility +set nocompatible + +" syntax highlighting is on +syntax on +" save information for 100 files, with up to 50 lines for each register +set viminfo='100,\"50 +if v:lang =~ "utf8$" || v:lang =~ "UTF-8$" + set fileencodings=utf-8,latin1 +endif diff --git a/devops/slurm/utils/cli_test.py b/devops/slurm/utils/cli_test.py new file mode 100755 index 00000000..dc331a6e --- /dev/null +++ b/devops/slurm/utils/cli_test.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python + +import argparse +import getpass +import random +import sys +import tempfile +import time + +import girder_client + + +def get_girder_client(opts): + """ + Log in to Girder and return a reference to the client. + + :param opts: options that include the username, password, and girder api + url. + :returns: the girder client. + """ + token = opts.get('token') + username = opts.get('username') + password = opts.get('password') + if not username and not token: + username = input('Admin login: ') + if not password and not token: + password = getpass.getpass('Password for %s: ' % ( + username if username else 'default admin user')) + client = girder_client.GirderClient(apiUrl=opts['apiurl']) + if token: + client.setToken(token) + else: + client.authenticate(username, password) + return client + + +def get_test_data(client, opts): # noqa + """ + Make sure we have a test collection with a folder with test data. + + :param client: girder client. + :param opts: command line options. + """ + collName = 'HistomicsTK Tests' + try: + collection = client.resourceLookup('/collection/' + collName) + except Exception: + collection = None + if not collection: + collection = client.createCollection(collName, public=True) + folderName = 'Images' + try: + folder = client.resourceLookup('/collection/%s/%s' % (collName, folderName)) + except Exception: + folder = None + if not folder: + folder = client.createFolder(collection['_id'], folderName, parentType='collection') + remote = girder_client.GirderClient(apiUrl='https://data.kitware.com/api/v1') + remoteFolder = remote.resourceLookup('/collection/HistomicsTK/Deployment test images') + for item in remote.listItem(remoteFolder['_id']): + localPath = '/collection/%s/%s/%s' % (collName, folderName, item['name']) + try: + localItem = client.resourceLookup(localPath) + except Exception: + localItem = None + if localItem: + if opts.get('test') == 'local': + continue + client.delete('item/%s' % localItem['_id']) + localItem = client.createItem(folder['_id'], item['name']) + for remoteFile in remote.listFile(item['_id']): + with tempfile.NamedTemporaryFile() as tf: + fileName = tf.name + tf.close() + sys.stdout.write('Downloading %s' % remoteFile['name']) + sys.stdout.flush() + remote.downloadFile(remoteFile['_id'], fileName) + sys.stdout.write(' .') + sys.stdout.flush() + client.uploadFileToItem( + localItem['_id'], fileName, filename=remoteFile['name'], + mimeType=remoteFile['mimeType']) + sys.stdout.write('.\n') + sys.stdout.flush() + for item in list(client.listItem(folder['_id'])): + if '.anot' in item['name']: + sys.stdout.write('Deleting %s\n' % item['name']) + sys.stdout.flush() + client.delete('item/%s' % item['_id']) + continue + if 'largeImage' not in item: + sys.stdout.write('Making large_item %s ' % item['name']) + sys.stdout.flush() + job = client.post('item/%s/tiles' % item['_id']) + if job is not None: + job, peak_memory = wait_for_job(client, job) + else: + print('done') + return folder + + +def install_cli(client, imageName): + """ + Make sure the specified CLI is installed. + + :param client: girder client. + :param imageName: name of the CLI docker image + """ + client.put('slicer_cli_web/docker_image', data={'name': '["%s"]' % imageName}) + job = client.get('job/all', parameters={ + 'sort': 'created', 'sortdir': -1, + 'types': '["slicer_cli_web_job"]', + 'limit': 1})[0] + sys.stdout.write('Adding %s ' % imageName) + wait_for_job(client, job) + + +def get_memory_use(client): + """ + Get the memory use as reported by the system. + + :return: the system/check virtualMemory['used'] information. + """ + info = client.get('system/check?mode=quick') + return info['virtualMemory']['used'] + + +def test_cli(client, folder, opts): # noqa + """ + Run the CLI on an image and make sure we get an annotation out of it. + + :param client: girder client. + :param folder: the parent folder of the test images. + :param opts: command line options. + """ + testItem = None + if not opts.get('testid'): + for item in client.listItem(folder['_id']): + if item['name'].startswith('TCGA-02'): + testItem = item + break + else: + testItem = {'_id': opts.get('testid')} + localFile = next(client.listFile(testItem['_id'])) + path = 'slicer_cli_web/%s/NucleiDetection/run' % ( + opts['cli'].replace('/', '_').replace(':', '_'), ) + sys.stdout.write('Running %s ' % opts['cli']) + sys.stdout.flush() + anList = client.get('annotation', parameters={ + 'itemId': testItem['_id'], 'sort': '_id', 'sortdir': -1, 'limit': 1}) + lastOldAnnotId = None + if len(anList): + lastOldAnnotId = anList[0]['_id'] + memory_use = get_memory_use(client) + starttime = time.time() + region = '[15000,15000,1000,1000]' + if opts.get('randomregion'): + metadata = client.get('item/%s/tiles' % testItem['_id']) + w = metadata['sizeX'] + h = metadata['sizeY'] + rw = random.randint(500, 5000) + rh = random.randint(500, 5000) + region = '[%d,%d,%d,%d]' % (random.randint(0, w - rw), random.randint(0, h - rh), rw, rh) + if opts.get('noregion'): + region = '[-1,-1,-1,-1]' + data = { + 'inputImageFile': localFile['_id'], + 'outputNucleiAnnotationFile_folder': folder['_id'], + 'outputNucleiAnnotationFile': 'cli_test.anot', + 'analysis_roi': region, + 'foreground_threshold': '60', + 'min_fgnd_frac': '0.05', + + 'analysis_tile_size': '4096', + 'nuclei_annotation_format': 'bbox', + 'max_radius': '30', + 'min_radius': '20', + } + if opts.get('testarg') and len(opts.get('testarg')): + testarg = {val.split('=', 1)[0]: val.split('=', 1)[1] for val in opts['testarg']} + data.update(testarg) + if opts.get('verbose', 0) >= 1: + sys.stdout.write('%r\n' % data) + job = client.post(path, data=data) + job, peak_memory = wait_for_job(client, job) + runtime = time.time() - starttime + # Wait for the annotation to be processed after the job finishes. + maxWait = time.time() + 60 + annot = None + while not annot and time.time() < maxWait: + anList = client.get('annotation', parameters={ + 'itemId': testItem['_id'], 'sort': '_id', 'sortdir': -1, 'limit': 1}) + if len(anList) and anList[0]['_id'] != lastOldAnnotId: + annot = client.get('annotation/%s' % anList[0]['_id']) + break + time.sleep(1) + sys.stdout.write('Total time: %5.3f, Max memory delta: %d bytes, Elements: %d\n' % ( + runtime, peak_memory - memory_use, len(annot['annotation']['elements']))) + sys.stdout.flush() + if len(annot['annotation']['elements']) < 100: + raise Exception('Got less than 100 annotation elements (%d) from annotation %s' % ( + len(annot['annotation']['elements']), anList[0]['_id'])) + anList = client.get('annotation', parameters=dict( + sort='_id', sortdir=-1, itemId=testItem['_id'])) + keep = 3 + for annot in anList: + if annot['annotation']['name'] == 'cli_test-nuclei-bbox': + if keep: + keep -= 1 + else: + client.delete(f'annotation/{annot["_id"]}') + + +def test_tiles(client, folder, opts): + """ + Make sure we have a test collection with a folder with test data. + + :param client: girder client. + :param folder: the parent folder of the test images. + :param opts: command line options. + """ + for item in client.listItem(folder['_id']): + if 'largeImage' not in item: + raise Exception('No large image in item') + result = client.get('item/%s/tiles/region' % item['_id'], parameters={ + 'left': 100, 'top': 150, 'right': 400, 'bottom': 450, + 'encoding': 'PNG', + }, jsonResp=False) + region = result.content + if region[1:4] != b'PNG' or len(region) < 6000: + raise Exception('Region did not give expected results') + + +def wait_for_job(client, job): + """ + Wait for a job to complete. + + :param client: the girder client. + :param job: a girder job. + :return: the updated girder job. + """ + peak_memory_use = get_memory_use(client) + lastdot = 0 + jobId = job['_id'] + while job['status'] not in (3, 4, 5): + if time.time() - lastdot >= 3: + sys.stdout.write('.') + sys.stdout.flush() + lastdot = time.time() + time.sleep(0.25) + peak_memory_use = max(peak_memory_use, get_memory_use(client)) + job = client.get('job/%s' % jobId) + if job['status'] == 3: + print(' ready') + else: + print(' failed') + return job, peak_memory_use + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Download test data for HistomicsTK, and test that basic functions work.') + parser.add_argument( + 'cli', + help='A cli docker image name. This is pulled and used in tests.') + parser.add_argument( + '--apiurl', '--api', '--url', '-a', + default='http://127.0.0.1:8080/api/v1', help='The Girder api url.') + parser.add_argument( + '--password', '--pass', '--passwd', '--pw', + help='The Girder admin password. If not specified, a prompt is given.') + parser.add_argument( + '--username', '--user', + help='The Girder admin username. If not specified, a prompt is given.') + parser.add_argument( + '--token', + help='A Girder admin authentication token. If specified, username ' + 'and password are ignored') + parser.add_argument( + '--no-cli', '--nocli', action='store_true', dest='nocli', + help='Do not pull and upload the cli; assume it is already present.') + parser.add_argument( + '--no-region', '--noregion', '--whole', action='store_true', + dest='noregion', + help='Run the cli against the whole image (this is slow).') + parser.add_argument( + '--random-region', '--randomregion', '--random', action='store_true', + dest='randomregion', + help='Run the cli against a random region on the image (this may be slow).') + parser.add_argument( + '--test', action='store_true', default=False, + help='Download test data and check that basic functions work.') + parser.add_argument( + '--test-local', '--local-test', '--local', action='store_const', + dest='test', const='local', + help='Use local test data and check that basic functions work. If ' + 'local data is not present, it is downloaded.') + parser.add_argument( + '--no-test', action='store_false', dest='test', + help='Do not download test data and do not run checks.') + parser.add_argument( + '--test-id', dest='testid', help='The ID of the item to test.') + parser.add_argument( + '--test-arg', '--arg', '--testarg', dest='testarg', action='append', + help='Test arguments. These should be of the form =.') + parser.add_argument( + '--only-data', '--data', action='store_const', dest='test', + const='data', + help='Download test data, but do not run CLI.') + parser.add_argument('--verbose', '-v', action='count', default=0) + + args = parser.parse_args() + if args.verbose >= 2: + print('Parsed arguments: %r' % args) + client = get_girder_client(vars(args)) + if not args.nocli: + install_cli(client, args.cli) + if args.test: + folder = get_test_data(client, vars(args)) + test_tiles(client, folder, vars(args)) + if args.test != 'data': + test_cli(client, folder, vars(args)) diff --git a/devops/slurm/utils/rebuild_and_restart_girder.sh b/devops/slurm/utils/rebuild_and_restart_girder.sh new file mode 100755 index 00000000..0aa29bb4 --- /dev/null +++ b/devops/slurm/utils/rebuild_and_restart_girder.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -e + +OLDSTART=$(curl --silent 'http://127.0.0.1:8080/api/v1/system/version') +girder build --dev +touch /etc/girder.cfg +echo "Girder has been rebuilt and will now restart" +while true; do NEWSTART=$(curl --silent 'http://127.0.0.1:8080/api/v1/system/version' || true); if [ "${OLDSTART}" != "${NEWSTART}" ]; then echo ${NEWSTART} | grep -q 'release' && break || true; fi; sleep 1; echo -n "."; done +echo "" +echo "Girder has restarted" diff --git a/devops/slurm/utils/restart_girder.sh b/devops/slurm/utils/restart_girder.sh new file mode 100755 index 00000000..82643460 --- /dev/null +++ b/devops/slurm/utils/restart_girder.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +OLDSTART=$(curl --silent 'http://127.0.0.1:8080/api/v1/system/version') +touch /etc/girder.cfg +echo "Girder will now restart" +while true; do NEWSTART=$(curl --silent 'http://127.0.0.1:8080/api/v1/system/version' || true); if [ "${OLDSTART}" != "${NEWSTART}" ]; then echo ${NEWSTART} | grep -q 'release' && break || true; fi; sleep 1; echo -n "."; done +echo "" +echo "Girder has restarted" From b4ecc190bb039adce1050fbc66532be1d22c963b Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 25 Feb 2025 17:14:03 -0500 Subject: [PATCH 05/24] Install worker packages --- devops/slurm/docker-compose.yml | 2 ++ devops/slurm/provision.yaml | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/devops/slurm/docker-compose.yml b/devops/slurm/docker-compose.yml index b79c3a0d..f41c4828 100644 --- a/devops/slurm/docker-compose.yml +++ b/devops/slurm/docker-compose.yml @@ -169,6 +169,8 @@ services: # girder settings. # - ./worker.local.cfg:/opt/girder_worker/girder_worker/worker.local.cfg # Allow overriding the start command + - ./provision.py:/opt/digital_slide_archive/devops/dsa/provision.py + - ./provision.yaml:/opt/digital_slide_archive/devops/dsa/provision.yaml - ./start_worker.sh:/opt/digital_slide_archive/devops/dsa/start_worker.sh # Needed to allow transferring data to slicer_cli_web docker containers - ${TMPDIR:-/tmp}:${TMPDIR:-/tmp} diff --git a/devops/slurm/provision.yaml b/devops/slurm/provision.yaml index 0a30b255..0dde0425 100644 --- a/devops/slurm/provision.yaml +++ b/devops/slurm/provision.yaml @@ -113,8 +113,16 @@ worker: # pip: # - package_one # Run additional shell commands in the worker before start - # shell: - # - ls + shell: + - rm -rf /opt/girder_worker /opt/slicer_cli_web + - git clone --branch slurm https://github.com/girder/girder_worker.git /opt/girder_worker + - git clone --branch slicer-cli-web-singularity https://github.com/girder/slicer_cli_web.git /opt/slicer_cli_web + - pip install -e /opt/girder_worker + - pip install -e /opt/girder_worker/girder_worker/singularity + - pip install -e /opt/girder_worker/girder_worker/slurm + - pip install -e /opt/slicer_cli_web + - pip install -e /opt/slicer_cli_web/slicer_cli_web/singularity + shell: - rm -rf /opt/girder_worker /opt/slicer_cli_web - git clone --branch slurm https://github.com/girder/girder_worker.git /opt/girder_worker From 9095c6b739dba13ba3193dbfdeb3697b86b5dc1e Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 25 Feb 2025 17:14:18 -0500 Subject: [PATCH 06/24] Add SIF directory --- devops/slurm/SIF/.gitignore | 2 ++ devops/slurm/docker-compose.yml | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 devops/slurm/SIF/.gitignore diff --git a/devops/slurm/SIF/.gitignore b/devops/slurm/SIF/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/slurm/SIF/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/slurm/docker-compose.yml b/devops/slurm/docker-compose.yml index f41c4828..585f4d0c 100644 --- a/devops/slurm/docker-compose.yml +++ b/devops/slurm/docker-compose.yml @@ -156,6 +156,7 @@ services: DSA_WORKER_CONCURRENCY: ${DSA_WORKER_CONCURRENCY:-2} DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} TMPDIR: + SIF_IMAGE_PATH: /SIF # See comments about authorizing docker repositories above # DOCKER_CONFIG: /.docker @@ -177,6 +178,7 @@ services: # See comments about authorizing docker repositories above # - /home//.docker:/.docker:ro + - ./SIF:/SIF # Add additional mounts here to get access to existing files on your # system if they have the same path as on the girder container. From 5d58989fc4f47f0afed43e81774820323c51f06c Mon Sep 17 00:00:00 2001 From: willdunklin Date: Wed, 26 Feb 2025 13:52:26 -0500 Subject: [PATCH 07/24] Add apptainer configuration for docker --- devops/slurm/docker-compose.yml | 12 +++++++++++- slurm.Dockerfile | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 slurm.Dockerfile diff --git a/devops/slurm/docker-compose.yml b/devops/slurm/docker-compose.yml index 585f4d0c..b65275db 100644 --- a/devops/slurm/docker-compose.yml +++ b/devops/slurm/docker-compose.yml @@ -4,6 +4,7 @@ services: image: dsarchive/dsa_common build: context: ../.. + dockerfile: slurm.Dockerfile # We use this to optionally set version information during the build args: DSA_VERSIONS: ${DSA_VERSIONS:-} @@ -38,6 +39,8 @@ services: # below), and do "docker login " on the host machine before # starting the DSA. # DOCKER_CONFIG: /.docker + SIF_IMAGE_PATH: /SIF + LOGS: /logs restart: unless-stopped # Set DSA_PORT to expose the interface on another port (default 8080). ports: @@ -61,6 +64,7 @@ services: # mounting source files from the host without breaking the internal data. # - /opt/HistomicsUI/histomicsui.egg-info # - ../../../HistomicsUI:/opt/HistomicsUI + - ./SIF:/SIF # See comments about authorizing docker repositories above # - /home//.docker:/.docker:ro @@ -146,6 +150,7 @@ services: build: context: ../.. # We use this to optionally set version information during the build + dockerfile: slurm.Dockerfile args: DSA_VERSIONS: ${DSA_VERSIONS:-} # Set DSA_USER to a user id that is part of the docker group (e.g., @@ -157,10 +162,14 @@ services: DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} TMPDIR: SIF_IMAGE_PATH: /SIF - + LOGS: /logs # this is needed in slicer_cli_web -> direct_singularity_run.py:53 + GIRDER_WORKER_SLURM_SUBMIT_SCRIPT: /opt/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm # See comments about authorizing docker repositories above # DOCKER_CONFIG: /.docker restart: unless-stopped + + privileged: true # needed for apptainer in docker. TODO: re-evaulate method + volumes: # Needed to use slicer_cli_web to run docker containers - /var/run/docker.sock:/var/run/docker.sock @@ -179,6 +188,7 @@ services: # See comments about authorizing docker repositories above # - /home//.docker:/.docker:ro - ./SIF:/SIF + - ./logs:/logs # Add additional mounts here to get access to existing files on your # system if they have the same path as on the girder container. diff --git a/slurm.Dockerfile b/slurm.Dockerfile new file mode 100644 index 00000000..1617f8eb --- /dev/null +++ b/slurm.Dockerfile @@ -0,0 +1,6 @@ +FROM dsarchive/dsa_common:latest +LABEL maintainer="Kitware, Inc. " + +RUN add-apt-repository -y ppa:apptainer/ppa \ + && apt update \ + && apt install -y apptainer-suid From db255a68d0e1f9f916d4011185cc07a3a6fd552f Mon Sep 17 00:00:00 2001 From: willdunklin Date: Thu, 27 Feb 2025 09:12:21 -0500 Subject: [PATCH 08/24] Move worker from docker to virtual environment --- devops/slurm/docker-compose.yml | 120 ++++++++++++++++---------------- devops/slurm/worker/.gitignore | 4 ++ devops/slurm/worker/create.sh | 8 +++ devops/slurm/worker/run.sh | 17 +++++ 4 files changed, 89 insertions(+), 60 deletions(-) create mode 100644 devops/slurm/worker/.gitignore create mode 100755 devops/slurm/worker/create.sh create mode 100755 devops/slurm/worker/run.sh diff --git a/devops/slurm/docker-compose.yml b/devops/slurm/docker-compose.yml index b65275db..891348be 100644 --- a/devops/slurm/docker-compose.yml +++ b/devops/slurm/docker-compose.yml @@ -96,8 +96,8 @@ services: - ./db:/data/db # Uncomment to allow access to the database from outside of the docker # network. - # ports: - # - "27017" + ports: + - "27017:27017" logging: options: max-size: "10M" @@ -129,8 +129,8 @@ services: image: "rabbitmq:latest" restart: unless-stopped # Uncomment to allow access to rabbitmq from outside of the docker network - # ports: - # - "5672" + ports: + - "5672:5672" environment: RABBITMQ_DEFAULT_USER: ${RABBITMQ_DEFAULT_USER:-} RABBITMQ_DEFAULT_PASS: ${RABBITMQ_DEFAULT_PASS:-} @@ -145,63 +145,63 @@ services: interval: 30s timeout: 30s retries: 3 - worker: - image: dsarchive/dsa_common - build: - context: ../.. - # We use this to optionally set version information during the build - dockerfile: slurm.Dockerfile - args: - DSA_VERSIONS: ${DSA_VERSIONS:-} - # Set DSA_USER to a user id that is part of the docker group (e.g., - # `DSA_USER=$(id -u):$(id -g)`). This provides permissions to manage - # docker - environment: - DSA_USER: ${DSA_USER:-} - DSA_WORKER_CONCURRENCY: ${DSA_WORKER_CONCURRENCY:-2} - DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} - TMPDIR: - SIF_IMAGE_PATH: /SIF - LOGS: /logs # this is needed in slicer_cli_web -> direct_singularity_run.py:53 - GIRDER_WORKER_SLURM_SUBMIT_SCRIPT: /opt/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm - # See comments about authorizing docker repositories above - # DOCKER_CONFIG: /.docker - restart: unless-stopped + # worker: + # image: dsarchive/dsa_common + # build: + # context: ../.. + # # We use this to optionally set version information during the build + # dockerfile: slurm.Dockerfile + # args: + # DSA_VERSIONS: ${DSA_VERSIONS:-} + # # Set DSA_USER to a user id that is part of the docker group (e.g., + # # `DSA_USER=$(id -u):$(id -g)`). This provides permissions to manage + # # docker + # environment: + # DSA_USER: ${DSA_USER:-} + # DSA_WORKER_CONCURRENCY: ${DSA_WORKER_CONCURRENCY:-2} + # DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} + # TMPDIR: + # SIF_IMAGE_PATH: /SIF + # LOGS: /logs # this is needed in slicer_cli_web -> direct_singularity_run.py:53 + # GIRDER_WORKER_SLURM_SUBMIT_SCRIPT: /opt/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm + # # See comments about authorizing docker repositories above + # # DOCKER_CONFIG: /.docker + # restart: unless-stopped - privileged: true # needed for apptainer in docker. TODO: re-evaulate method + # privileged: true # needed for apptainer in docker. TODO: re-evaulate method - volumes: - # Needed to use slicer_cli_web to run docker containers - - /var/run/docker.sock:/var/run/docker.sock - # Modify the worker.local.cfg to specify a different rabbitmq server and - # then enable this mount. On the rabbitmq server, make sure you add a - # non-guest default user and use that both in the worker and in the main - # girder settings. - # - ./worker.local.cfg:/opt/girder_worker/girder_worker/worker.local.cfg - # Allow overriding the start command - - ./provision.py:/opt/digital_slide_archive/devops/dsa/provision.py - - ./provision.yaml:/opt/digital_slide_archive/devops/dsa/provision.yaml - - ./start_worker.sh:/opt/digital_slide_archive/devops/dsa/start_worker.sh - # Needed to allow transferring data to slicer_cli_web docker containers - - ${TMPDIR:-/tmp}:${TMPDIR:-/tmp} + # volumes: + # # Needed to use slicer_cli_web to run docker containers + # - /var/run/docker.sock:/var/run/docker.sock + # # Modify the worker.local.cfg to specify a different rabbitmq server and + # # then enable this mount. On the rabbitmq server, make sure you add a + # # non-guest default user and use that both in the worker and in the main + # # girder settings. + # # - ./worker.local.cfg:/opt/girder_worker/girder_worker/worker.local.cfg + # # Allow overriding the start command + # - ./provision.py:/opt/digital_slide_archive/devops/dsa/provision.py + # - ./provision.yaml:/opt/digital_slide_archive/devops/dsa/provision.yaml + # - ./start_worker.sh:/opt/digital_slide_archive/devops/dsa/start_worker.sh + # # Needed to allow transferring data to slicer_cli_web docker containers + # - ${TMPDIR:-/tmp}:${TMPDIR:-/tmp} - # See comments about authorizing docker repositories above - # - /home//.docker:/.docker:ro - - ./SIF:/SIF - - ./logs:/logs + # # See comments about authorizing docker repositories above + # # - /home//.docker:/.docker:ro + # - ./SIF:/SIF + # - ./logs:/logs - # Add additional mounts here to get access to existing files on your - # system if they have the same path as on the girder container. - depends_on: - - rabbitmq - command: /opt/digital_slide_archive/devops/dsa/start_worker.sh - healthcheck: - test: ["CMD", "celery", "-b", "amqp://rabbitmq:5672", "inspect", "ping"] - interval: 5m - timeout: 10s - retries: 3 - start_period: 30s - logging: - options: - max-size: "10M" - max-file: "5" + # # Add additional mounts here to get access to existing files on your + # # system if they have the same path as on the girder container. + # depends_on: + # - rabbitmq + # command: /opt/digital_slide_archive/devops/dsa/start_worker.sh + # healthcheck: + # test: ["CMD", "celery", "-b", "amqp://rabbitmq:5672", "inspect", "ping"] + # interval: 5m + # timeout: 10s + # retries: 3 + # start_period: 30s + # logging: + # options: + # max-size: "10M" + # max-file: "5" diff --git a/devops/slurm/worker/.gitignore b/devops/slurm/worker/.gitignore new file mode 100644 index 00000000..feae96b1 --- /dev/null +++ b/devops/slurm/worker/.gitignore @@ -0,0 +1,4 @@ +venv/ + +girder_work/ +slicer_cli_web/ diff --git a/devops/slurm/worker/create.sh b/devops/slurm/worker/create.sh new file mode 100755 index 00000000..92da6aca --- /dev/null +++ b/devops/slurm/worker/create.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +rm -rf ./lib/girder_worker ./lib/slicer_cli_web ./venv + +python -m venv ./venv + +git clone --branch slurm https://github.com/girder/girder_worker.git ./lib/girder_worker +git clone --branch slicer-cli-web-singularity https://github.com/girder/slicer_cli_web.git ./lib/slicer_cli_web diff --git a/devops/slurm/worker/run.sh b/devops/slurm/worker/run.sh new file mode 100755 index 00000000..437287c7 --- /dev/null +++ b/devops/slurm/worker/run.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +. ./venv/bin/activate + +pip install girder girder_jobs +pip install -e ./lib/girder_worker +pip install -e ./lib/girder_worker/girder_worker/singularity +pip install -e ./lib/girder_worker/girder_worker/slurm +pip install -e ./lib/slicer_cli_web +pip install -e ./lib/slicer_cli_web/slicer_cli_web/singularity + +# PATH="$HOME/misc/girder_worker/env/bin:$PATH" GW_DIRECT_PATHS=true python -m girder_worker -l info -Ofair --prefetch-multiplier=1 --without-heartbeat --concurrency=2 +SIF_IMAGE_PATH=~/work/digital_slide_archive/devops/slurm/SIF \ +LOGS=~/work/digital_slide_archive/devops/slurm/logs \ +GIRDER_WORKER_SLURM_SUBMIT_SCRIPT=~/work/digital_slide_archive/devops/slurm/worker/lib/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm \ +GW_DIRECT_PATHS=true \ +python -m girder_worker -l info -Ofair --prefetch-multiplier=1 --without-heartbeat --concurrency=2 From fb5f2ec7b807445363653d324fed711071e645ca Mon Sep 17 00:00:00 2001 From: willdunklin Date: Mon, 3 Mar 2025 17:01:27 -0500 Subject: [PATCH 09/24] Fix provisioning --- devops/slurm/docker-compose.yml | 2 ++ devops/slurm/provision.yaml | 13 +++---------- devops/slurm/worker.dist.cfg | 12 ++++++++++++ devops/slurm/worker/build.sh | 10 ++++++++++ devops/slurm/worker/run.sh | 17 ++++++----------- 5 files changed, 33 insertions(+), 21 deletions(-) create mode 100644 devops/slurm/worker.dist.cfg create mode 100755 devops/slurm/worker/build.sh diff --git a/devops/slurm/docker-compose.yml b/devops/slurm/docker-compose.yml index 891348be..b0bda66c 100644 --- a/devops/slurm/docker-compose.yml +++ b/devops/slurm/docker-compose.yml @@ -59,6 +59,8 @@ services: # Location to store logs - ./logs:/logs + - ./worker.dist.cfg:/opt/worker.dist.cfg + # For local development, uncomment the set of mounts associated with the # local source files. Adding the editable egg directories first allows # mounting source files from the host without breaking the internal data. diff --git a/devops/slurm/provision.yaml b/devops/slurm/provision.yaml index 0dde0425..c05aad17 100644 --- a/devops/slurm/provision.yaml +++ b/devops/slurm/provision.yaml @@ -97,7 +97,7 @@ settings: # - dsarchive/histomicstk:latest # List slicer-cli-images to always pull, and load slicer-cli-image-pull: - - dsarchive/histomicstk:latest + # - dsarchive/histomicstk:latest # The worker can specify parameters for provisioning # worker-rabbitmq-host: girder:8080 worker-rabbitmq-user: guest @@ -113,20 +113,13 @@ worker: # pip: # - package_one # Run additional shell commands in the worker before start - shell: - - rm -rf /opt/girder_worker /opt/slicer_cli_web - - git clone --branch slurm https://github.com/girder/girder_worker.git /opt/girder_worker - - git clone --branch slicer-cli-web-singularity https://github.com/girder/slicer_cli_web.git /opt/slicer_cli_web - - pip install -e /opt/girder_worker - - pip install -e /opt/girder_worker/girder_worker/singularity - - pip install -e /opt/girder_worker/girder_worker/slurm - - pip install -e /opt/slicer_cli_web - - pip install -e /opt/slicer_cli_web/slicer_cli_web/singularity + # shell: shell: - rm -rf /opt/girder_worker /opt/slicer_cli_web - git clone --branch slurm https://github.com/girder/girder_worker.git /opt/girder_worker - git clone --branch slicer-cli-web-singularity https://github.com/girder/slicer_cli_web.git /opt/slicer_cli_web + - cp /opt/worker.dist.cfg /opt/girder_worker/girder_worker/worker.dist.cfg - pip install -e /opt/girder_worker - pip install -e /opt/girder_worker/girder_worker/singularity - pip install -e /opt/girder_worker/girder_worker/slurm diff --git a/devops/slurm/worker.dist.cfg b/devops/slurm/worker.dist.cfg new file mode 100644 index 00000000..0c93b25f --- /dev/null +++ b/devops/slurm/worker.dist.cfg @@ -0,0 +1,12 @@ +[celery] +app_main=girder_worker +broker=amqp://guest:guest@rabbitmq/ +backend=rpc://guest:guest@rabbitmq/ + +[girder_worker] +# root dir where temp files for jobs will be written +tmp_root=tmp + +[logging] +level=info +format=[%%(asctime)s] %%(levelname)s: %%(message)s diff --git a/devops/slurm/worker/build.sh b/devops/slurm/worker/build.sh new file mode 100755 index 00000000..9407cde9 --- /dev/null +++ b/devops/slurm/worker/build.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +. ./venv/bin/activate + +pip install girder girder_jobs +pip install -e ./lib/girder_worker +pip install -e ./lib/girder_worker/girder_worker/singularity +pip install -e ./lib/girder_worker/girder_worker/slurm +pip install -e ./lib/slicer_cli_web +pip install -e ./lib/slicer_cli_web/slicer_cli_web/singularity diff --git a/devops/slurm/worker/run.sh b/devops/slurm/worker/run.sh index 437287c7..324d6558 100755 --- a/devops/slurm/worker/run.sh +++ b/devops/slurm/worker/run.sh @@ -2,16 +2,11 @@ . ./venv/bin/activate -pip install girder girder_jobs -pip install -e ./lib/girder_worker -pip install -e ./lib/girder_worker/girder_worker/singularity -pip install -e ./lib/girder_worker/girder_worker/slurm -pip install -e ./lib/slicer_cli_web -pip install -e ./lib/slicer_cli_web/slicer_cli_web/singularity +# TODO: communicate TMP assignment (it needs to be accessible by worker's node and compute node) -# PATH="$HOME/misc/girder_worker/env/bin:$PATH" GW_DIRECT_PATHS=true python -m girder_worker -l info -Ofair --prefetch-multiplier=1 --without-heartbeat --concurrency=2 SIF_IMAGE_PATH=~/work/digital_slide_archive/devops/slurm/SIF \ -LOGS=~/work/digital_slide_archive/devops/slurm/logs \ -GIRDER_WORKER_SLURM_SUBMIT_SCRIPT=~/work/digital_slide_archive/devops/slurm/worker/lib/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm \ -GW_DIRECT_PATHS=true \ -python -m girder_worker -l info -Ofair --prefetch-multiplier=1 --without-heartbeat --concurrency=2 + TMP=/slurmshare/test-dsa-slurm/tmp \ + LOGS=~/work/digital_slide_archive/devops/slurm/logs \ + GIRDER_WORKER_SLURM_SUBMIT_SCRIPT=~/work/digital_slide_archive/devops/slurm/worker/lib/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm \ + GW_DIRECT_PATHS=true \ + python -m girder_worker -l info -Ofair --prefetch-multiplier=1 --without-heartbeat --concurrency=2 From 2fc82e4588d20b3189432c2a3cc409a87f1584d3 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 4 Mar 2025 13:25:04 -0500 Subject: [PATCH 10/24] Update documentation for Slurm --- devops/slurm/README.md | 86 +++++++++ devops/slurm/README.rst | 189 ------------------- devops/slurm/docker-compose.yml | 68 +------ devops/slurm/provision.yaml | 2 +- devops/slurm/worker/.gitignore | 3 - devops/slurm/worker/{build.sh => install.sh} | 0 devops/slurm/worker/lib/.gitignore | 2 + devops/slurm/worker/run.sh | 13 +- 8 files changed, 104 insertions(+), 259 deletions(-) create mode 100644 devops/slurm/README.md delete mode 100644 devops/slurm/README.rst rename devops/slurm/worker/{build.sh => install.sh} (100%) create mode 100644 devops/slurm/worker/lib/.gitignore diff --git a/devops/slurm/README.md b/devops/slurm/README.md new file mode 100644 index 00000000..aa7cb4bd --- /dev/null +++ b/devops/slurm/README.md @@ -0,0 +1,86 @@ +# DSA Slurm + +This is a setup for running the DSA with Slurm workers. + +## Requirements + +We expect to run on: + +- Slurm control node (TODO: what's the proper name?) + - Have access to `sbatch`, `scontrol show job`, `scancel`. +- Have apptainer installed on compute nodes (TODO: what's the proper name?) + + +## Key differences from typical DSA setup: + +### RabbitMQ + +Port 5672 is exposed. + + +### MongoDB + +Port 27017 is exposed. + + +### Worker + +We run the girder_worker outside of the docker containers. We do this because we need the worker to access the Slurm CLI. +The worker setup is located in `worker/`. + +Key files in `worker/`: +- `create.sh` - creates a virtual environment and clones `girder_worker`/`slicer_cli_web` repositories. +- `install.sh` - installs the slurm worker from cloned repositories. +- `run.sh` - runs the girder worker. Contains environment variables THAT MUST BE CONFIGURED (see below). +- `lib/` - directory for cloned repositories (TODO: rename?) + + +*BEFORE RUNNING*: edit `run.sh` to ensure the correct environment variables set. See [Environment variables](#environment-variables) section for more information. + + +To get started with the worker, run the following commands: +```bash +cd worker +# only run create.sh when you first set up the worker +# (or when you want to recreate the virtual environment) +./create.sh + +# run install.sh to editable install the girder_worker +# you should only need to run this once +./install.sh + +# *set the environment variables in run.sh* + +# run the worker +./run.sh +``` + +Navigate to http://{dsa.url}/#plugins/worker/task/status to check if the worker is connected to Girder. + + +You may need to modify the `Worker` plugin settings in Girder so the worker can find Girder. Ensure the `Alternative Girder API URL` setting is set to the Girder API URL from the perspective of the worker. In most cases, this will be `http://localhost:8080/api/v1`. + +If the worker fails to connect to `rabbitmq`, this may be because the celery `broker`/`backend` configuration is invalid. To check these config values, see `worker/lib/girder_worker/girder_worker/worker.dist.cfg`. In most cases these values should look like `://guest:guest@localhost/`, where localhost is the RabbitMQ address (with the implied default port of 5672). + + +### Misc + +*One RabbitMQ/Celery config note:* + +While the worker expects RabbitMQ broker/backend to run on `localhost`, the girder docker container will expect the address to be `rabbitmq`. +To ensure girder gets the correct configuration, we mount the properly configured `./worker.dist.cfg` and copy it to the correct location during girder provisioning. + + +## Environment variables + +The following specify values specify directories which need to be accessible by both the worker's node and compute nodes. +- `TMP`: temporary directory for the worker to store files. +- `LOGS`: directory for the worker to store logs. +- `SIF_IMAGE_PATH`: directory for the worker to store Singularity/Apptainer images. See note below. +- `GIRDER_WORKER_SLURM_SUBMIT_SCRIPT`: path to the script that submits jobs to Slurm. See `girder_worker/girder_worker/slurm/girder_worker_slurm/singularity.slurm`. + + +Additionally, `SIF_IMAGE_PATH` should be set to the same directory we mount in `docker-compose.yaml` for the girder container (whatever is mounted to `/SIF`). +This is because girder will pull the Singularity/Apptainer images and store them in this directory. The worker will then use this directory to access the images. + +These environment variables are set in `worker/run.sh`. You must edit this file to set the correct values before running the worker. diff --git a/devops/slurm/README.rst b/devops/slurm/README.rst deleted file mode 100644 index cd5fa61c..00000000 --- a/devops/slurm/README.rst +++ /dev/null @@ -1,189 +0,0 @@ -======================================== -Digital Slide Archive via Docker Compose -======================================== - -This directory contains a complete docker compose set up for the Digital Slide Archive. - -Edit the docker-compose.yml file (or add a docker compose override file) to add mount points for additional data or for exposing additional ports. - -Prerequisites -------------- - -Before using this, you need both Docker and docker compose. See the `official installation instructions `_. - -The docker compose file assumes certain file paths. This has been tested on Ubuntu 20.04. It will probably work on other Linux variants. - -Get the Digital Slide Archive repository:: - - git clone https://github.com/DigitalSlideArchive/digital_slide_archive - -Hardware Requirements -~~~~~~~~~~~~~~~~~~~~~ - -The main server has only modest hardware requirements. It can run in a 2 core, 8 GByte machine (such as an t3.large EC2 instance), but it will perform better with more memory and cores. - -Storage requirements are largely driven by the images used and the number of annotations. For a small test instance, a few GBytes will suffice. For a deployment with high usage, this will need to be much, much larger. - -The worker's requirements are highly dependent on the algorithms being run. The core HistomicsTK examples do not require a GPU, but benefit for more cores and more memory. For a small test instance, this can be run on the same machine as the server. For a serious deployment, multiple workers with GPUs will greatly facilitate some jobs. - -Start ------ - -Change to the appropriate directory:: - - cd digital_slide_archive/devops/dsa/ - -To get the most recent built docker images, do:: - - docker compose pull - -If you don't pull the images, the main image will be built in preference to pulling. - -To start the Digital Slide Archive:: - - DSA_USER=$(id -u):$(id -g) docker compose up - -This uses your current user id so that database files, logs, assetstore files, and temporary files are owned by the current user. If you omit setting ``DSA_USER``, files may be created owned by root. - -The girder instance can now be accessed at http://localhost:8080. By default, it creates an ``admin`` user with a password of ``password``. Note that this example does not add any default tasks or sample files. You can log in with the admin user and use the Slicer CLI Web plugin settings to add default tasks (e.g., ``dsarchive/histomicstk:latest``). - -Stop ----- - -To stop the Digital Slide Archive:: - - docker compose down -v - -The ``-v`` option removes unneeded temporary docker volumes. - -Sample Data ------------ - -Sample data can be added after performing ``docker compose up`` by running:: - - python3 utils/cli_test.py dsarchive/histomicstk:latest --test - -This downloads the HistomicsTK analysis tools, some sample data, and runs nuclei detection on some of the sample data. You need Python 3.6 or later available and may need to ``pip install girder-client`` before you can run this command. - - -Development ------------ - -You can log into the running ``girder`` or ``worker`` containers by typing:: - - docker compose exec girder bash - -There are two convenience scripts ``restart_girder.sh`` and ``rebuild_and_restart_girder.sh`` that can be run in the container. - -You can develop source code by mounting the source directory into the container. See the ``docker-compose.yml`` file for details. - -If you need to log into the container as the Girder user, type:: - - docker compose exec --user $(id -u) girder bash - -Technical Details ------------------ - -The Digital Slider Archive is built in Girder and Girder Worker. Here, these are coordinated using docker compose. There are five containers that are started: - -- `Girder `_. Girder is an asset and user management system. It handles permissions and serves data via http. - -- `MongoDB `_. Girder stores settings and information about users and assets in a MongoDB database. - -- `Girder Worker `_. Girder Worker is a task runner based on `Celery `_ that has specific features to get authenticated data from Girder. - -- `RabbitMQ `_. Girder communicates to Girder Worker through a broker. In this configuration it is RabbitMQ. Girder Worker can be run on multiple computers communicating with a single broker to distribute processing. - -- `Memcached `_. Memcached is used to cache data for faster access. This is used for large tiled images. - -The Digital Slide Archive relies on several Girder plugins: - -- `large_image `_. This provides a standardized way to access a wide range of image formats. Images can be handled as multi-resolution tiles. large_image has numerous tile sources to handle different formats. - -- `HistomicUI `_. This provides a user interface to examine and annotate large images. - -- `Slicer CLI Web `_. This can run processing tasks in Docker containers. Tasks report their capabilities via the Slicer CLI standard, listing required and optional inputs and outputs. These tasks can be selected and configured via Girder and HistomicsUI and then run in a distributed fashion via Girder Worker. - -Slicer CLI Web runs tasks in Docker containers and is itself running in a Docker container (in Girder for determining options and Girder Worker to run the task). In order to allow a process in a docker container to create another docker container, the paths the docker executable and communications sockets are mounted from the host to the docker container. - -Permissions ------------ - -By default, the girder container is run in Docker privileged mode. This can be reduced to a small set of permissions (see the docker-compose.yml file for details), but these may vary depending on the host system. If no extra permissions are granted, or if the docker daemon is started with --no-new-privileges, or if libfuse is not installed on the host system, the internal fuse mount will not be started. This may prevent full functionality with non-filesystem assestores and with some multiple-file image formats. - -Customizing ------------ - -Since this uses standard docker compose, you can customize the process by creating a ``docker-compose.override.yml`` file in the same directory (or a yaml file of any name and use appropriate ``docker compose -f docker-compose.yml -f `` command). Further, if you mount a provisioning yaml file into the docker image, you can customize settings, plugins, resources, and other options. - -See the ``docker-compose.yml`` and ``provision.yaml`` files for details. - -Example -~~~~~~~ - -To add some additional girder plugins and mount additional directories for assetstores, you can do something like this: - -``docker-compose.override.yml``:: - - --- - version: '3' - services: - girder: - environment: - # Specify that we want to use the provisioning file - DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} - volumes: - # Mount the local provisioning file into the container - - ./provision.local.yaml:/opt/digital_slide_archive/devops/dsa/provision.yaml - # Also expose a local data mount into the container - - /mnt/data:/mnt/data - -``provision.local.yaml``:: - - --- - # Load some sample data - samples: True - # A list of additional pip modules to install - pip: - - girder-oauth - - girder-ldap - # rebuild the girder web client since we install some additional plugins - rebuild-client: True - # List slicer-cli-images to pull and load - slicer-cli-image: - - dsarchive/histomicstk:latest - - girder/slicer_cli_web:small - -Using Private Docker Registries for CLI images -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -One of the principal abilities of the Digital Slide Archive is to run algorithms that are packages via Docker and expose their interface via the Slicer Execution Model. See `HistomicTK `_ as an example. - -For docker images that are published on public container registries, these can be imported either as part of the provisioning process or via the Slicer CLI Web plugin UI by using the docker image tag (e.g., ``dsarchive/histomcstk:latest``). - -Since private registries require authentication, pulling docker images from private registries will not work in the reference deployment without either logging into the running docker container (for both the main Girder container and for any and all girder_worker containers) and authenticating via the ``docker login `` OR by authenticating on the base operating system and passing through the authentication as part of the provisioning process. - -An example of passing through the authentication using docker compose is commented in the default docker-compose.yaml file. In this case, use ``docker login`` on the base machine running the DSA and on any worker machines. Use the appropriate override: - -``docker-compose.override.yml``:: - - --- - version: '3' - services: - girder: - environment: - DOCKER_CONFIG: /.docker - volumes: - - /home//.docker:/.docker:ro - worker: - environment: - DOCKER_CONFIG: /.docker - volumes: - - /home//.docker:/.docker:ro - -Docker images can then be added via the provisioning or via the UI using the appropriate private registry and tag (e.g., ``private_registry:5000/dsarchive/histomicstk:latest`` would pull the image from a registry called ``private_registry`` that serves data on port 5000). - -Database Backup ---------------- - -You may want to periodically back up the database. The standard ``mongodump`` tool can be used for this via a command line ``docker compose exec mongodb /usr/bin/mongodump --db girder --archive --gzip > dsa_girder.dump.gz``. Restoring is similar: ``docker compose exec -T mongodb /usr/bin/mongorestore --db girder --archive --gzip < /tmp/dsa_girder.dump.gz``; you may want to add ``--drop`` as flag to the restore process. See Mongo's official documentation for details. diff --git a/devops/slurm/docker-compose.yml b/devops/slurm/docker-compose.yml index b0bda66c..b88df273 100644 --- a/devops/slurm/docker-compose.yml +++ b/devops/slurm/docker-compose.yml @@ -39,8 +39,11 @@ services: # below), and do "docker login " on the host machine before # starting the DSA. # DOCKER_CONFIG: /.docker + + # Environment variables needed for girder_worker_slurm SIF_IMAGE_PATH: /SIF LOGS: /logs + restart: unless-stopped # Set DSA_PORT to expose the interface on another port (default 8080). ports: @@ -59,6 +62,10 @@ services: # Location to store logs - ./logs:/logs + # Mount the SIF directory to allow shared access to singularity images + - ./SIF:/SIF + + # Mount local celery config (see README.md for more info) - ./worker.dist.cfg:/opt/worker.dist.cfg # For local development, uncomment the set of mounts associated with the @@ -66,7 +73,6 @@ services: # mounting source files from the host without breaking the internal data. # - /opt/HistomicsUI/histomicsui.egg-info # - ../../../HistomicsUI:/opt/HistomicsUI - - ./SIF:/SIF # See comments about authorizing docker repositories above # - /home//.docker:/.docker:ro @@ -147,63 +153,3 @@ services: interval: 30s timeout: 30s retries: 3 - # worker: - # image: dsarchive/dsa_common - # build: - # context: ../.. - # # We use this to optionally set version information during the build - # dockerfile: slurm.Dockerfile - # args: - # DSA_VERSIONS: ${DSA_VERSIONS:-} - # # Set DSA_USER to a user id that is part of the docker group (e.g., - # # `DSA_USER=$(id -u):$(id -g)`). This provides permissions to manage - # # docker - # environment: - # DSA_USER: ${DSA_USER:-} - # DSA_WORKER_CONCURRENCY: ${DSA_WORKER_CONCURRENCY:-2} - # DSA_PROVISION_YAML: ${DSA_PROVISION_YAML:-/opt/digital_slide_archive/devops/dsa/provision.yaml} - # TMPDIR: - # SIF_IMAGE_PATH: /SIF - # LOGS: /logs # this is needed in slicer_cli_web -> direct_singularity_run.py:53 - # GIRDER_WORKER_SLURM_SUBMIT_SCRIPT: /opt/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm - # # See comments about authorizing docker repositories above - # # DOCKER_CONFIG: /.docker - # restart: unless-stopped - - # privileged: true # needed for apptainer in docker. TODO: re-evaulate method - - # volumes: - # # Needed to use slicer_cli_web to run docker containers - # - /var/run/docker.sock:/var/run/docker.sock - # # Modify the worker.local.cfg to specify a different rabbitmq server and - # # then enable this mount. On the rabbitmq server, make sure you add a - # # non-guest default user and use that both in the worker and in the main - # # girder settings. - # # - ./worker.local.cfg:/opt/girder_worker/girder_worker/worker.local.cfg - # # Allow overriding the start command - # - ./provision.py:/opt/digital_slide_archive/devops/dsa/provision.py - # - ./provision.yaml:/opt/digital_slide_archive/devops/dsa/provision.yaml - # - ./start_worker.sh:/opt/digital_slide_archive/devops/dsa/start_worker.sh - # # Needed to allow transferring data to slicer_cli_web docker containers - # - ${TMPDIR:-/tmp}:${TMPDIR:-/tmp} - - # # See comments about authorizing docker repositories above - # # - /home//.docker:/.docker:ro - # - ./SIF:/SIF - # - ./logs:/logs - - # # Add additional mounts here to get access to existing files on your - # # system if they have the same path as on the girder container. - # depends_on: - # - rabbitmq - # command: /opt/digital_slide_archive/devops/dsa/start_worker.sh - # healthcheck: - # test: ["CMD", "celery", "-b", "amqp://rabbitmq:5672", "inspect", "ping"] - # interval: 5m - # timeout: 10s - # retries: 3 - # start_period: 30s - # logging: - # options: - # max-size: "10M" - # max-file: "5" diff --git a/devops/slurm/provision.yaml b/devops/slurm/provision.yaml index c05aad17..aa8c798e 100644 --- a/devops/slurm/provision.yaml +++ b/devops/slurm/provision.yaml @@ -73,7 +73,7 @@ resources: settings: worker.broker: "amqp://guest:guest@rabbitmq" worker.backend: "rpc://guest:guest@rabbitmq" - worker.api_url: "http://girder:8080/api/v1" + worker.api_url: "http://localhost:8080/api/v1" worker.direct_path: True core.brand_name: "Digital Slide Archive" # core.http_only_cookies: True diff --git a/devops/slurm/worker/.gitignore b/devops/slurm/worker/.gitignore index feae96b1..f7275bbb 100644 --- a/devops/slurm/worker/.gitignore +++ b/devops/slurm/worker/.gitignore @@ -1,4 +1 @@ venv/ - -girder_work/ -slicer_cli_web/ diff --git a/devops/slurm/worker/build.sh b/devops/slurm/worker/install.sh similarity index 100% rename from devops/slurm/worker/build.sh rename to devops/slurm/worker/install.sh diff --git a/devops/slurm/worker/lib/.gitignore b/devops/slurm/worker/lib/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/slurm/worker/lib/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/slurm/worker/run.sh b/devops/slurm/worker/run.sh index 324d6558..424d6558 100755 --- a/devops/slurm/worker/run.sh +++ b/devops/slurm/worker/run.sh @@ -1,12 +1,15 @@ #!/usr/bin/env bash +CURRENT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + . ./venv/bin/activate -# TODO: communicate TMP assignment (it needs to be accessible by worker's node and compute node) +echo -e "Before running, please edit environment variables per README.md instructions.\n Once set, remove this line from script.\n" && exit 1 -SIF_IMAGE_PATH=~/work/digital_slide_archive/devops/slurm/SIF \ - TMP=/slurmshare/test-dsa-slurm/tmp \ - LOGS=~/work/digital_slide_archive/devops/slurm/logs \ - GIRDER_WORKER_SLURM_SUBMIT_SCRIPT=~/work/digital_slide_archive/devops/slurm/worker/lib/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm \ +# CHANGE THESE VALUES (see README.md) +TMP=/slurmshare/test-dsa-slurm/tmp \ + SIF_IMAGE_PATH="$CURRENT_DIR/../SIF" \ + LOGS="$CURRENT_DIR/../logs" \ + GIRDER_WORKER_SLURM_SUBMIT_SCRIPT="$CURRENT_DIR/lib/girder_worker/girder_worker/slurm/girder_worker_slurm/singluarity.slurm" \ GW_DIRECT_PATHS=true \ python -m girder_worker -l info -Ofair --prefetch-multiplier=1 --without-heartbeat --concurrency=2 From 9659982d53e96f591177e23c294becee93467810 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 4 Mar 2025 13:31:01 -0500 Subject: [PATCH 11/24] Add DSA readme instructions --- slurm.Dockerfile => apptainer.Dockerfile | 0 devops/slurm/README.md | 11 +++++++++++ devops/slurm/docker-compose.yml | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) rename slurm.Dockerfile => apptainer.Dockerfile (100%) diff --git a/slurm.Dockerfile b/apptainer.Dockerfile similarity index 100% rename from slurm.Dockerfile rename to apptainer.Dockerfile diff --git a/devops/slurm/README.md b/devops/slurm/README.md index aa7cb4bd..1ef138b7 100644 --- a/devops/slurm/README.md +++ b/devops/slurm/README.md @@ -84,3 +84,14 @@ Additionally, `SIF_IMAGE_PATH` should be set to the same directory we mount in ` This is because girder will pull the Singularity/Apptainer images and store them in this directory. The worker will then use this directory to access the images. These environment variables are set in `worker/run.sh`. You must edit this file to set the correct values before running the worker. + + +## To to run the DSA + +```bash +# build the docker containers (needed for apptainer in docker) +docker compose build + +# stop/start the girder and worker containers +docker compose down; DSA_USER=$(id -u):$(id -g) docker compose up +``` diff --git a/devops/slurm/docker-compose.yml b/devops/slurm/docker-compose.yml index b88df273..beb18657 100644 --- a/devops/slurm/docker-compose.yml +++ b/devops/slurm/docker-compose.yml @@ -4,7 +4,7 @@ services: image: dsarchive/dsa_common build: context: ../.. - dockerfile: slurm.Dockerfile + dockerfile: apptainer.Dockerfile # We use this to optionally set version information during the build args: DSA_VERSIONS: ${DSA_VERSIONS:-} From 3dc7a7d93c7669b7ba5c449d1d70f9057cf830ae Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 4 Mar 2025 13:59:46 -0500 Subject: [PATCH 12/24] Add entry_path disclaimer --- devops/slurm/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/devops/slurm/README.md b/devops/slurm/README.md index 1ef138b7..776c21b9 100644 --- a/devops/slurm/README.md +++ b/devops/slurm/README.md @@ -95,3 +95,16 @@ docker compose build # stop/start the girder and worker containers docker compose down; DSA_USER=$(id -u):$(id -g) docker compose up ``` + + +## Note on Apptainer `slicer_cli_web` images + +Typically the DSA executes CLIs using Docker images. `slicer_cli_web` manages the pulling of these images while `girder_worker` manages the execution of the CLIs. + +However with most Slurm/HPC systems, we need to run the CLIs in Apptainer (aka Singularity) containers since Docker execution is limited. Apptainer provides a default mechanism for converting Docker images to Apptainer images. + +One critical difference between Docker and Apptainer images is that Apptainer doesn't have a notion of a `WORKDIR`. Unfortunately this means that `WORKDIR` information is lost during the image conversion process. This causes issues with CLIs that rely on the `WORKDIR` to find files. + +Our current workaround is to manually add an `entry_path` `LABEL` to Docker images we want to convert (where `entry_path` is set to the `WORKDIR` value). + +To make your Docker image CLI compatible with `girder_worker_slurm`, add the `LABEL entry_path=/path/to/workdir` to your image. From bf5947b28bdd5a7e41c21930ee2c7b602cfa19c7 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 4 Mar 2025 14:05:23 -0500 Subject: [PATCH 13/24] Update readme --- devops/slurm/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/devops/slurm/README.md b/devops/slurm/README.md index 776c21b9..99b69008 100644 --- a/devops/slurm/README.md +++ b/devops/slurm/README.md @@ -6,10 +6,9 @@ This is a setup for running the DSA with Slurm workers. We expect to run on: -- Slurm control node (TODO: what's the proper name?) +- Slurm control node - Have access to `sbatch`, `scontrol show job`, `scancel`. -- Have apptainer installed on compute nodes (TODO: what's the proper name?) - +- Have apptainer installed on compute nodes. ## Key differences from typical DSA setup: From e8311325c8714eb97b0429f16a0f13bbda342a86 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 4 Mar 2025 14:10:20 -0500 Subject: [PATCH 14/24] Add references to external repos in README --- devops/slurm/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/devops/slurm/README.md b/devops/slurm/README.md index 99b69008..59a9e1b5 100644 --- a/devops/slurm/README.md +++ b/devops/slurm/README.md @@ -107,3 +107,11 @@ One critical difference between Docker and Apptainer images is that Apptainer do Our current workaround is to manually add an `entry_path` `LABEL` to Docker images we want to convert (where `entry_path` is set to the `WORKDIR` value). To make your Docker image CLI compatible with `girder_worker_slurm`, add the `LABEL entry_path=/path/to/workdir` to your image. + + +## See also + +- [Girder Worker Slurm](https://github.com/girder/girder_worker/tree/slurm/girder_worker/slurm) plugin +- [Girder Worker Singularity](https://github.com/girder/girder_worker/tree/slurm/girder_worker/singularity) plugin +- [Slicer CLI Web Singularity](https://github.com/girder/slicer_cli_web/tree/slicer-cli-web-singularity) plugin +- [Apptainer](https://apptainer.org/docs/admin/main/installation.html) installation docs From f71211d8fd68b03201d5a4c8d716e79f4e58d638 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 4 Mar 2025 14:19:02 -0500 Subject: [PATCH 15/24] Cleanup unused files --- devops/slurm/start_worker.sh | 34 -- devops/slurm/utils/.vimrc | 39 --- devops/slurm/utils/cli_test.py | 322 ------------------ .../slurm/utils/rebuild_and_restart_girder.sh | 11 - devops/slurm/utils/restart_girder.sh | 8 - 5 files changed, 414 deletions(-) delete mode 100755 devops/slurm/start_worker.sh delete mode 100644 devops/slurm/utils/.vimrc delete mode 100755 devops/slurm/utils/cli_test.py delete mode 100755 devops/slurm/utils/rebuild_and_restart_girder.sh delete mode 100755 devops/slurm/utils/restart_girder.sh diff --git a/devops/slurm/start_worker.sh b/devops/slurm/start_worker.sh deleted file mode 100755 index 0437668e..00000000 --- a/devops/slurm/start_worker.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -# Ensures that the main process runs as the DSA_USER and is part of both that -# group and the docker group. Fail if DSA_USER is not specified. -if [[ -z "$DSA_USER" ]]; then - echo "Set the DSA_USER before starting (e.g, DSA_USER=\$$(id -u):\$$(id -g) " - exit 1 -fi -# add a user with the DSA_USER's id; this user is named ubuntu if it doesn't -# exist. -adduser --uid ${DSA_USER%%:*} --disabled-password --gecos "" ubuntu 2>/dev/null -# add a group with the DSA_USER's group id. -addgroup --gid ${DSA_USER#*:} $(id -ng ${DSA_USER#*:}) 2>/dev/null -# add the user to the user group. -adduser $(id -nu ${DSA_USER%%:*}) $(getent group ${DSA_USER#*:} | cut "-d:" -f1) 2>/dev/null -# add a group with the docker group id. -addgroup --gid $(stat -c "%g" /var/run/docker.sock) docker 2>/dev/null -# add the user to the docker group. -adduser $(id -nu ${DSA_USER%%:*}) $(getent group $(stat -c "%g" /var/run/docker.sock) | cut "-d:" -f1) 2>/dev/null -# Try to increase permissions for the docker socket; this helps this work on -# OSX where the users don't translate -chmod 777 /var/run/docker.sock 2>/dev/null || true -chmod 777 ${TMPDIR:-/tmp} || true -echo ==== Pre-Provisioning === -python3 /opt/digital_slide_archive/devops/dsa/provision.py --worker-pre -echo ==== Provisioning === && -python3 /opt/digital_slide_archive/devops/dsa/provision.py --worker-main -echo ==== Starting Worker === && -# Run subsequent commands as the DSA_USER. This sets some paths based on what -# is expected in the Docker so that the current python environment and the -# devops/dsa/utils are available. Then it runs girder_worker -su $(id -nu ${DSA_USER%%:*}) -c " - PATH=\"/opt/digital_slide_archive/devops/dsa/utils:/opt/venv/bin:/.pyenv/bin:/.pyenv/shims:$PATH\"; - DOCKER_CLIENT_TIMEOUT=86400 TMPDIR=${TMPDIR:-/tmp} GW_DIRECT_PATHS=true python -m girder_worker --concurrency=${DSA_WORKER_CONCURRENCY:-2} -Ofair --prefetch-multiplier=1 -" diff --git a/devops/slurm/utils/.vimrc b/devops/slurm/utils/.vimrc deleted file mode 100644 index 8b12d7ce..00000000 --- a/devops/slurm/utils/.vimrc +++ /dev/null @@ -1,39 +0,0 @@ -" see :options -" expandtabs -set et -set tabstop=4 -" shiftwidth -set sw=4 -set nocindent -" autoindent -set ai -" tell indenting programs that we already indented the buffer -let b:did_indent = 1 -" don't do an incremental search (don't search before we finish typing) -set nois -" don't ignore case by default -set noic -" don't break at 80 characters -set wrap -" don't add linebreaks at 80 characters -set nolbr -" highlight all search matches -set hls -" default to utf-8 -set enc=utf-8 -" show the cursor position -set ruler -" allow backspace to go to the previous line -set bs=2 -" keep this much history -set history=50 -" don't try to maintain vi compatibility -set nocompatible - -" syntax highlighting is on -syntax on -" save information for 100 files, with up to 50 lines for each register -set viminfo='100,\"50 -if v:lang =~ "utf8$" || v:lang =~ "UTF-8$" - set fileencodings=utf-8,latin1 -endif diff --git a/devops/slurm/utils/cli_test.py b/devops/slurm/utils/cli_test.py deleted file mode 100755 index dc331a6e..00000000 --- a/devops/slurm/utils/cli_test.py +++ /dev/null @@ -1,322 +0,0 @@ -#!/usr/bin/env python - -import argparse -import getpass -import random -import sys -import tempfile -import time - -import girder_client - - -def get_girder_client(opts): - """ - Log in to Girder and return a reference to the client. - - :param opts: options that include the username, password, and girder api - url. - :returns: the girder client. - """ - token = opts.get('token') - username = opts.get('username') - password = opts.get('password') - if not username and not token: - username = input('Admin login: ') - if not password and not token: - password = getpass.getpass('Password for %s: ' % ( - username if username else 'default admin user')) - client = girder_client.GirderClient(apiUrl=opts['apiurl']) - if token: - client.setToken(token) - else: - client.authenticate(username, password) - return client - - -def get_test_data(client, opts): # noqa - """ - Make sure we have a test collection with a folder with test data. - - :param client: girder client. - :param opts: command line options. - """ - collName = 'HistomicsTK Tests' - try: - collection = client.resourceLookup('/collection/' + collName) - except Exception: - collection = None - if not collection: - collection = client.createCollection(collName, public=True) - folderName = 'Images' - try: - folder = client.resourceLookup('/collection/%s/%s' % (collName, folderName)) - except Exception: - folder = None - if not folder: - folder = client.createFolder(collection['_id'], folderName, parentType='collection') - remote = girder_client.GirderClient(apiUrl='https://data.kitware.com/api/v1') - remoteFolder = remote.resourceLookup('/collection/HistomicsTK/Deployment test images') - for item in remote.listItem(remoteFolder['_id']): - localPath = '/collection/%s/%s/%s' % (collName, folderName, item['name']) - try: - localItem = client.resourceLookup(localPath) - except Exception: - localItem = None - if localItem: - if opts.get('test') == 'local': - continue - client.delete('item/%s' % localItem['_id']) - localItem = client.createItem(folder['_id'], item['name']) - for remoteFile in remote.listFile(item['_id']): - with tempfile.NamedTemporaryFile() as tf: - fileName = tf.name - tf.close() - sys.stdout.write('Downloading %s' % remoteFile['name']) - sys.stdout.flush() - remote.downloadFile(remoteFile['_id'], fileName) - sys.stdout.write(' .') - sys.stdout.flush() - client.uploadFileToItem( - localItem['_id'], fileName, filename=remoteFile['name'], - mimeType=remoteFile['mimeType']) - sys.stdout.write('.\n') - sys.stdout.flush() - for item in list(client.listItem(folder['_id'])): - if '.anot' in item['name']: - sys.stdout.write('Deleting %s\n' % item['name']) - sys.stdout.flush() - client.delete('item/%s' % item['_id']) - continue - if 'largeImage' not in item: - sys.stdout.write('Making large_item %s ' % item['name']) - sys.stdout.flush() - job = client.post('item/%s/tiles' % item['_id']) - if job is not None: - job, peak_memory = wait_for_job(client, job) - else: - print('done') - return folder - - -def install_cli(client, imageName): - """ - Make sure the specified CLI is installed. - - :param client: girder client. - :param imageName: name of the CLI docker image - """ - client.put('slicer_cli_web/docker_image', data={'name': '["%s"]' % imageName}) - job = client.get('job/all', parameters={ - 'sort': 'created', 'sortdir': -1, - 'types': '["slicer_cli_web_job"]', - 'limit': 1})[0] - sys.stdout.write('Adding %s ' % imageName) - wait_for_job(client, job) - - -def get_memory_use(client): - """ - Get the memory use as reported by the system. - - :return: the system/check virtualMemory['used'] information. - """ - info = client.get('system/check?mode=quick') - return info['virtualMemory']['used'] - - -def test_cli(client, folder, opts): # noqa - """ - Run the CLI on an image and make sure we get an annotation out of it. - - :param client: girder client. - :param folder: the parent folder of the test images. - :param opts: command line options. - """ - testItem = None - if not opts.get('testid'): - for item in client.listItem(folder['_id']): - if item['name'].startswith('TCGA-02'): - testItem = item - break - else: - testItem = {'_id': opts.get('testid')} - localFile = next(client.listFile(testItem['_id'])) - path = 'slicer_cli_web/%s/NucleiDetection/run' % ( - opts['cli'].replace('/', '_').replace(':', '_'), ) - sys.stdout.write('Running %s ' % opts['cli']) - sys.stdout.flush() - anList = client.get('annotation', parameters={ - 'itemId': testItem['_id'], 'sort': '_id', 'sortdir': -1, 'limit': 1}) - lastOldAnnotId = None - if len(anList): - lastOldAnnotId = anList[0]['_id'] - memory_use = get_memory_use(client) - starttime = time.time() - region = '[15000,15000,1000,1000]' - if opts.get('randomregion'): - metadata = client.get('item/%s/tiles' % testItem['_id']) - w = metadata['sizeX'] - h = metadata['sizeY'] - rw = random.randint(500, 5000) - rh = random.randint(500, 5000) - region = '[%d,%d,%d,%d]' % (random.randint(0, w - rw), random.randint(0, h - rh), rw, rh) - if opts.get('noregion'): - region = '[-1,-1,-1,-1]' - data = { - 'inputImageFile': localFile['_id'], - 'outputNucleiAnnotationFile_folder': folder['_id'], - 'outputNucleiAnnotationFile': 'cli_test.anot', - 'analysis_roi': region, - 'foreground_threshold': '60', - 'min_fgnd_frac': '0.05', - - 'analysis_tile_size': '4096', - 'nuclei_annotation_format': 'bbox', - 'max_radius': '30', - 'min_radius': '20', - } - if opts.get('testarg') and len(opts.get('testarg')): - testarg = {val.split('=', 1)[0]: val.split('=', 1)[1] for val in opts['testarg']} - data.update(testarg) - if opts.get('verbose', 0) >= 1: - sys.stdout.write('%r\n' % data) - job = client.post(path, data=data) - job, peak_memory = wait_for_job(client, job) - runtime = time.time() - starttime - # Wait for the annotation to be processed after the job finishes. - maxWait = time.time() + 60 - annot = None - while not annot and time.time() < maxWait: - anList = client.get('annotation', parameters={ - 'itemId': testItem['_id'], 'sort': '_id', 'sortdir': -1, 'limit': 1}) - if len(anList) and anList[0]['_id'] != lastOldAnnotId: - annot = client.get('annotation/%s' % anList[0]['_id']) - break - time.sleep(1) - sys.stdout.write('Total time: %5.3f, Max memory delta: %d bytes, Elements: %d\n' % ( - runtime, peak_memory - memory_use, len(annot['annotation']['elements']))) - sys.stdout.flush() - if len(annot['annotation']['elements']) < 100: - raise Exception('Got less than 100 annotation elements (%d) from annotation %s' % ( - len(annot['annotation']['elements']), anList[0]['_id'])) - anList = client.get('annotation', parameters=dict( - sort='_id', sortdir=-1, itemId=testItem['_id'])) - keep = 3 - for annot in anList: - if annot['annotation']['name'] == 'cli_test-nuclei-bbox': - if keep: - keep -= 1 - else: - client.delete(f'annotation/{annot["_id"]}') - - -def test_tiles(client, folder, opts): - """ - Make sure we have a test collection with a folder with test data. - - :param client: girder client. - :param folder: the parent folder of the test images. - :param opts: command line options. - """ - for item in client.listItem(folder['_id']): - if 'largeImage' not in item: - raise Exception('No large image in item') - result = client.get('item/%s/tiles/region' % item['_id'], parameters={ - 'left': 100, 'top': 150, 'right': 400, 'bottom': 450, - 'encoding': 'PNG', - }, jsonResp=False) - region = result.content - if region[1:4] != b'PNG' or len(region) < 6000: - raise Exception('Region did not give expected results') - - -def wait_for_job(client, job): - """ - Wait for a job to complete. - - :param client: the girder client. - :param job: a girder job. - :return: the updated girder job. - """ - peak_memory_use = get_memory_use(client) - lastdot = 0 - jobId = job['_id'] - while job['status'] not in (3, 4, 5): - if time.time() - lastdot >= 3: - sys.stdout.write('.') - sys.stdout.flush() - lastdot = time.time() - time.sleep(0.25) - peak_memory_use = max(peak_memory_use, get_memory_use(client)) - job = client.get('job/%s' % jobId) - if job['status'] == 3: - print(' ready') - else: - print(' failed') - return job, peak_memory_use - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Download test data for HistomicsTK, and test that basic functions work.') - parser.add_argument( - 'cli', - help='A cli docker image name. This is pulled and used in tests.') - parser.add_argument( - '--apiurl', '--api', '--url', '-a', - default='http://127.0.0.1:8080/api/v1', help='The Girder api url.') - parser.add_argument( - '--password', '--pass', '--passwd', '--pw', - help='The Girder admin password. If not specified, a prompt is given.') - parser.add_argument( - '--username', '--user', - help='The Girder admin username. If not specified, a prompt is given.') - parser.add_argument( - '--token', - help='A Girder admin authentication token. If specified, username ' - 'and password are ignored') - parser.add_argument( - '--no-cli', '--nocli', action='store_true', dest='nocli', - help='Do not pull and upload the cli; assume it is already present.') - parser.add_argument( - '--no-region', '--noregion', '--whole', action='store_true', - dest='noregion', - help='Run the cli against the whole image (this is slow).') - parser.add_argument( - '--random-region', '--randomregion', '--random', action='store_true', - dest='randomregion', - help='Run the cli against a random region on the image (this may be slow).') - parser.add_argument( - '--test', action='store_true', default=False, - help='Download test data and check that basic functions work.') - parser.add_argument( - '--test-local', '--local-test', '--local', action='store_const', - dest='test', const='local', - help='Use local test data and check that basic functions work. If ' - 'local data is not present, it is downloaded.') - parser.add_argument( - '--no-test', action='store_false', dest='test', - help='Do not download test data and do not run checks.') - parser.add_argument( - '--test-id', dest='testid', help='The ID of the item to test.') - parser.add_argument( - '--test-arg', '--arg', '--testarg', dest='testarg', action='append', - help='Test arguments. These should be of the form =.') - parser.add_argument( - '--only-data', '--data', action='store_const', dest='test', - const='data', - help='Download test data, but do not run CLI.') - parser.add_argument('--verbose', '-v', action='count', default=0) - - args = parser.parse_args() - if args.verbose >= 2: - print('Parsed arguments: %r' % args) - client = get_girder_client(vars(args)) - if not args.nocli: - install_cli(client, args.cli) - if args.test: - folder = get_test_data(client, vars(args)) - test_tiles(client, folder, vars(args)) - if args.test != 'data': - test_cli(client, folder, vars(args)) diff --git a/devops/slurm/utils/rebuild_and_restart_girder.sh b/devops/slurm/utils/rebuild_and_restart_girder.sh deleted file mode 100755 index 0aa29bb4..00000000 --- a/devops/slurm/utils/rebuild_and_restart_girder.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -set -e - -OLDSTART=$(curl --silent 'http://127.0.0.1:8080/api/v1/system/version') -girder build --dev -touch /etc/girder.cfg -echo "Girder has been rebuilt and will now restart" -while true; do NEWSTART=$(curl --silent 'http://127.0.0.1:8080/api/v1/system/version' || true); if [ "${OLDSTART}" != "${NEWSTART}" ]; then echo ${NEWSTART} | grep -q 'release' && break || true; fi; sleep 1; echo -n "."; done -echo "" -echo "Girder has restarted" diff --git a/devops/slurm/utils/restart_girder.sh b/devops/slurm/utils/restart_girder.sh deleted file mode 100755 index 82643460..00000000 --- a/devops/slurm/utils/restart_girder.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -OLDSTART=$(curl --silent 'http://127.0.0.1:8080/api/v1/system/version') -touch /etc/girder.cfg -echo "Girder will now restart" -while true; do NEWSTART=$(curl --silent 'http://127.0.0.1:8080/api/v1/system/version' || true); if [ "${OLDSTART}" != "${NEWSTART}" ]; then echo ${NEWSTART} | grep -q 'release' && break || true; fi; sleep 1; echo -n "."; done -echo "" -echo "Girder has restarted" From d3291a139f221a86e76dd7bdb94875acb666b273 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 4 Mar 2025 15:36:52 -0500 Subject: [PATCH 16/24] Update Slurm README --- devops/slurm/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/slurm/README.md b/devops/slurm/README.md index 59a9e1b5..835db7e6 100644 --- a/devops/slurm/README.md +++ b/devops/slurm/README.md @@ -113,5 +113,5 @@ To make your Docker image CLI compatible with `girder_worker_slurm`, add the `LA - [Girder Worker Slurm](https://github.com/girder/girder_worker/tree/slurm/girder_worker/slurm) plugin - [Girder Worker Singularity](https://github.com/girder/girder_worker/tree/slurm/girder_worker/singularity) plugin -- [Slicer CLI Web Singularity](https://github.com/girder/slicer_cli_web/tree/slicer-cli-web-singularity) plugin +- [Slicer CLI Web Singularity](https://github.com/girder/slicer_cli_web/tree/slicer-cli-web-singularity/slicer_cli_web/singularity) plugin - [Apptainer](https://apptainer.org/docs/admin/main/installation.html) installation docs From 1bb54e5a90be0f8e872a32e631e2dfc61402d7f7 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 4 Mar 2025 15:38:44 -0500 Subject: [PATCH 17/24] Add slurm dockerignore --- devops/slurm/.dockerignore | 2 ++ devops/slurm/.env | 1 + histomicstkSlurm.Dockerfile | 3 +++ 3 files changed, 6 insertions(+) create mode 100644 devops/slurm/.env create mode 100644 histomicstkSlurm.Dockerfile diff --git a/devops/slurm/.dockerignore b/devops/slurm/.dockerignore index 847e7d05..02e1858b 100644 --- a/devops/slurm/.dockerignore +++ b/devops/slurm/.dockerignore @@ -1,3 +1,5 @@ assetstore db logs +SIF +worker diff --git a/devops/slurm/.env b/devops/slurm/.env new file mode 100644 index 00000000..8c9d7984 --- /dev/null +++ b/devops/slurm/.env @@ -0,0 +1 @@ +DSA_PORT=8090 diff --git a/histomicstkSlurm.Dockerfile b/histomicstkSlurm.Dockerfile new file mode 100644 index 00000000..6aa04535 --- /dev/null +++ b/histomicstkSlurm.Dockerfile @@ -0,0 +1,3 @@ +FROM dsarchive/histomicstk:latest + +LABEL entry_path=/HistomicsTK/histomicstk/cli From 05773858525d4fa7773c47665ae3a1062d42f1ad Mon Sep 17 00:00:00 2001 From: willdunklin Date: Thu, 6 Mar 2025 10:01:59 -0500 Subject: [PATCH 18/24] Remove env file --- devops/slurm/.env | 1 - 1 file changed, 1 deletion(-) delete mode 100644 devops/slurm/.env diff --git a/devops/slurm/.env b/devops/slurm/.env deleted file mode 100644 index 8c9d7984..00000000 --- a/devops/slurm/.env +++ /dev/null @@ -1 +0,0 @@ -DSA_PORT=8090 From 6f230814153b9bc88ff325ee1901693e67f631a5 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Wed, 12 Mar 2025 12:25:45 -0400 Subject: [PATCH 19/24] Update slicer_cli_web to use fork --- devops/slurm/provision.yaml | 2 +- devops/slurm/worker/create.sh | 2 +- histomicstkSlurm.Dockerfile | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) delete mode 100644 histomicstkSlurm.Dockerfile diff --git a/devops/slurm/provision.yaml b/devops/slurm/provision.yaml index aa8c798e..754daf46 100644 --- a/devops/slurm/provision.yaml +++ b/devops/slurm/provision.yaml @@ -118,7 +118,7 @@ worker: shell: - rm -rf /opt/girder_worker /opt/slicer_cli_web - git clone --branch slurm https://github.com/girder/girder_worker.git /opt/girder_worker - - git clone --branch slicer-cli-web-singularity https://github.com/girder/slicer_cli_web.git /opt/slicer_cli_web + - git clone --branch slicer-cli-web-singularity https://github.com/willdunklin/slicer_cli_web.git /opt/slicer_cli_web - cp /opt/worker.dist.cfg /opt/girder_worker/girder_worker/worker.dist.cfg - pip install -e /opt/girder_worker - pip install -e /opt/girder_worker/girder_worker/singularity diff --git a/devops/slurm/worker/create.sh b/devops/slurm/worker/create.sh index 92da6aca..b16b6959 100755 --- a/devops/slurm/worker/create.sh +++ b/devops/slurm/worker/create.sh @@ -5,4 +5,4 @@ rm -rf ./lib/girder_worker ./lib/slicer_cli_web ./venv python -m venv ./venv git clone --branch slurm https://github.com/girder/girder_worker.git ./lib/girder_worker -git clone --branch slicer-cli-web-singularity https://github.com/girder/slicer_cli_web.git ./lib/slicer_cli_web +git clone --branch slicer-cli-web-singularity https://github.com/willdunklin/slicer_cli_web.git ./lib/slicer_cli_web diff --git a/histomicstkSlurm.Dockerfile b/histomicstkSlurm.Dockerfile deleted file mode 100644 index 6aa04535..00000000 --- a/histomicstkSlurm.Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -FROM dsarchive/histomicstk:latest - -LABEL entry_path=/HistomicsTK/histomicstk/cli From 2c04ad49a253df8cede6b6ab48bd19920c08ff98 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 25 Mar 2025 13:51:14 -0400 Subject: [PATCH 20/24] Update singularity configuration --- devops/singularity-minimal/README.md | 16 +++++++ devops/singularity-minimal/README.rst | 9 ---- devops/singularity-minimal/dsa_common.def | 4 +- devops/singularity-minimal/dsa_compose.sh | 44 ++++++++++-------- devops/singularity-minimal/instance_run.sh | 10 ----- devops/singularity-minimal/instance_start.sh | 29 ------------ devops/singularity-minimal/instance_stop.sh | 5 --- devops/singularity-minimal/opt/.gitignore | 2 - devops/singularity-minimal/provision.yaml | 45 ++++++++++++++----- .../{instance_pull.sh => pull_images.sh} | 1 - devops/singularity-minimal/start_girder.sh | 2 + devops/singularity-minimal/start_worker.sh | 2 + .../singularity-minimal/worker_opt/.gitignore | 2 - 13 files changed, 81 insertions(+), 90 deletions(-) create mode 100644 devops/singularity-minimal/README.md delete mode 100644 devops/singularity-minimal/README.rst delete mode 100755 devops/singularity-minimal/instance_run.sh delete mode 100755 devops/singularity-minimal/instance_start.sh delete mode 100755 devops/singularity-minimal/instance_stop.sh delete mode 100644 devops/singularity-minimal/opt/.gitignore rename devops/singularity-minimal/{instance_pull.sh => pull_images.sh} (74%) delete mode 100644 devops/singularity-minimal/worker_opt/.gitignore diff --git a/devops/singularity-minimal/README.md b/devops/singularity-minimal/README.md new file mode 100644 index 00000000..fe00eee2 --- /dev/null +++ b/devops/singularity-minimal/README.md @@ -0,0 +1,16 @@ +# Apptainer DSA + +Launches DSA containers using `apptainer`. + +## Getting Started + +```bash +# Build dsa_common with apptainer +./build.sh + +# Pull DSA service images +./pull_images.sh + +# Start DSA services +./dsa_compose.sh +``` diff --git a/devops/singularity-minimal/README.rst b/devops/singularity-minimal/README.rst deleted file mode 100644 index eb3b13d0..00000000 --- a/devops/singularity-minimal/README.rst +++ /dev/null @@ -1,9 +0,0 @@ -Run the following singularity commands:: - - singularity run --bind ./db:/data/db docker://mongo:latest & - - singularity run --bind ./assetstore:/assetstore --bind ./girder.cfg:/etc/girder.cfg docker://dsarchive/dsa_common bash -c 'python /opt/digital_slide_archive/devops/minimal/provision.py --sample-data && girder serve' & - -Note these have both been set to run in the background, which might not be desired. - -This has only been minimally tested, and should be used with caution. diff --git a/devops/singularity-minimal/dsa_common.def b/devops/singularity-minimal/dsa_common.def index dd490a2b..c70c66c2 100644 --- a/devops/singularity-minimal/dsa_common.def +++ b/devops/singularity-minimal/dsa_common.def @@ -16,7 +16,7 @@ From: dsarchive/dsa_common git clone https://github.com/nvm-sh/nvm.git $NVM_DIR . $NVM_DIR/nvm.sh - + nvm install 14 && \ nvm alias default 14 && \ nvm use default @@ -28,6 +28,7 @@ From: dsarchive/dsa_common mv /opt /root_opt # install apptainer + apt update apt install -y software-properties-common add-apt-repository -y ppa:apptainer/ppa apt update @@ -48,3 +49,4 @@ From: dsarchive/dsa_common echo " - Copying docker image's opt (/root_opt) to user mounted /opt" echo " - Make sure you're mounting a local directory to /opt" cp -r /root_opt/* /opt + /opt/venv/bin/pip install pyaml diff --git a/devops/singularity-minimal/dsa_compose.sh b/devops/singularity-minimal/dsa_compose.sh index bea66fe5..a3065f3a 100755 --- a/devops/singularity-minimal/dsa_compose.sh +++ b/devops/singularity-minimal/dsa_compose.sh @@ -14,29 +14,27 @@ apptainer instance stop -a || echo "No instances stopped" apptainer instance start \ --bind ./db:/data/db \ SIF/mongodb.sif dsa-mongodb-1 - # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/db:/data/db \ - # --no-mount /cmsuf \ apptainer instance start \ --env RABBITMQ_DEFAULT_USER=guest \ --env RABBITMQ_DEFAULT_PASS=guest \ --bind ./rabbitmqdata:/var/lib/rabbitmq/ \ SIF/rabbitMQ.sif dsa-rabbitMQ-1 - # --no-mount /cmsuf \ apptainer instance start SIF/memcached.sif dsa-memcached-1 - # --no-mount /cmsuf -# clean girder opt -find ./opt/* -not -path "*opt/local_*" -not -path "*opt/.gitignore" -delete +# # clean girder opt +# find ./opt/* -not -path "*opt/local_*" -not -path "*opt/.gitignore" -delete -# set up worker opt -rm -rf ./worker_opt/* -cp -r ./opt/* ./worker_opt/ +# # set up worker opt +# # rm -rf ./worker_opt/* +# # cp -r ./opt/* ./worker_opt/ +rm -rf ./tmp/* +TMP_OPT_GIRDER=$(mktemp -d --tmpdir=./tmp) +TMP_OPT_WORKER=$(mktemp -d --tmpdir=./tmp) ## Start Girder and Worker apptainer instance start \ - --bind ./blue:/blue/pinaki.sarder/rc-svc-pinaki.sarder-web \ --bind ./assetstore:/assetstore \ --bind ./logs:/logs \ --bind ./tmp:/tmp \ @@ -45,8 +43,11 @@ apptainer instance start \ --bind ./start_girder.sh:/opt/start_girder.sh \ --bind ./provision.yaml:/opt/provision.yaml \ --bind ../dsa/provision.py:/opt/provision.py \ - --bind ./opt:/opt \ + --bind $TMP_OPT_GIRDER:/opt \ SIF/dsa_common.sif test-dsarchive + # --bind ./opt:/opt \ + + # --bind ./blue:/blue/pinaki.sarder/rc-svc-pinaki.sarder-web \ # --no-mount /cmsuf \ # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/assetstore:/assetstore \ # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/logs:/logs \ @@ -56,13 +57,15 @@ apptainer instance start \ # --bind /var/run/munge:/run/munge \ apptainer instance start \ - --bind ./blue:/blue/pinaki.sarder/rc-svc-pinaki.sarder-web \ --bind ./logs:/logs \ - --bind ./worker_opt:/opt \ --bind ./start_worker.sh:/opt/start_worker.sh \ --bind ./provision.yaml:/opt/provision.yaml \ --bind ../dsa/provision.py:/opt/provision.py \ + --bind $TMP_OPT_WORKER:/opt \ SIF/dsa_common.sif dsa-worker-1 + # --bind ./worker_opt:/opt \ + + # --bind ./blue:/blue/pinaki.sarder/rc-svc-pinaki.sarder-web \ # --no-mount /cmsuf \ # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/logs:/logs \ # --bind /apps \ @@ -79,14 +82,16 @@ apptainer run \ --env SIF_IMAGE_PATH="/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/tmp/sifs/" \ --env TMPDIR=/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/tmp \ --env LOGS=/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/logs \ - --env PATH=/opt/slurm/bin:$PATH \ - --env SLURM_QOS=pinaki.sarder-dsa \ - --env SLURM_ACCOUNT=pinaki.sarder-dsa \ --env DSA_PROVISION_YAML=/opt/provision.yaml \ --env GIRDER_WORKER_BROKER=amqp://guest:guest@localhost:5672/ \ --env GIRDER_WORKER_BACKEND=rpc://guest:guest@localhost:5672/ \ instance://dsa-worker-1 /opt/start_worker.sh & + # --env PATH=/opt/slurm/bin:$PATH \ + # --env SLURM_QOS=pinaki.sarder-dsa \ + # --env SLURM_ACCOUNT=pinaki.sarder-dsa \ + + sleep 30 apptainer run \ @@ -94,7 +99,8 @@ apptainer run \ --env TMPDIR=/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/tmp \ --env LOGS=/home/local/KHQ/will.dunklin/work/digital_slide_archive/devops/singularity-minimal/logs \ --env GIRDER_SETTING_WORKER_API_URL=http://0.0.0.0:8101/api/v1 \ - --env PATH=/opt/slurm/bin:$PATH \ - --env SLURM_QOS=pinaki.sarder-dsa \ - --env SLURM_ACCOUNT=pinaki.sarder-dsa \ instance://test-dsarchive bash # /opt/start_girder.sh + + # --env PATH=/opt/slurm/bin:$PATH \ + # --env SLURM_QOS=pinaki.sarder-dsa \ + # --env SLURM_ACCOUNT=pinaki.sarder-dsa \ diff --git a/devops/singularity-minimal/instance_run.sh b/devops/singularity-minimal/instance_run.sh deleted file mode 100755 index d3dc3eba..00000000 --- a/devops/singularity-minimal/instance_run.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -# singularity run instance://dsa-mongodb-1 & -# singularity run instance://dsa-memcached-1 & -# singularity run instance://dsa-rabbitMQ-1 & -# singularity run instance://dsa-dsarchive-1 bash -c 'python /opt/digital_slide_archive/devops/dsa/provision.py --sample-data && girder serve' & - -# docker run --rm -it -p 27017:27017 mongo:latest mongod # needs to have port bound externally - -singularity run instance://test-dsarchive diff --git a/devops/singularity-minimal/instance_start.sh b/devops/singularity-minimal/instance_start.sh deleted file mode 100755 index 50add24a..00000000 --- a/devops/singularity-minimal/instance_start.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -# singularity instance start --bind ./db:/data/db SIF/mongodb.sif dsa-mongodb-1 -# singularity instance start SIF/memcached.sif dsa-memcached-1 -# singularity instance start SIF/rabbitMQ.sif dsa-rabbitMQ-1 -singularity instance start \ - --bind ./opt:/opt \ - --bind ./assetstore:/assetstore \ - --bind ./logs:/logs \ - --bind ./fuse:/fuse \ - --bind ./girder.cfg:/etc/girder.cfg \ - --bind ./start_girder.sh:/opt/start_girder.sh \ - --bind ./provision.yaml:/opt/provision.yaml \ - SIF/dsa_common.sif test-dsarchive - -# needed to use singularity in singularity (for `singularity pull`, etc) - # --bind /usr/bin/singularity:/usr/bin/singularity \ - # --bind /usr/bin/apptainer:/usr/bin/apptainer \ - # --bind /etc/apptainer/apptainer.conf:/etc/apptainer/apptainer.conf \ - # --bind /usr/bin/mksquashfs:/usr/bin/mksquashfs \ - # --bind /usr/bin/unsquashfs:/usr/bin/unsquashfs \ - # --bind /usr/lib/x86_64-linux-gnu/liblzo2.so.2:/usr/lib/x86_64-linux-gnu/liblzo2.so.2 \ - -# needed to run `singularity exec` (doesn't work because of permissions) - # --bind /etc/apptainer:/etc/apptainer \ - # --bind /var/lib/apptainer/mnt/session:/var/lib/apptainer/mnt/session \ - # --bind /usr/libexec/apptainer:/usr/libexec/apptainer \ - # --bind /usr/libexec/apptainer/bin/starter:/usr/libexec/apptainer/bin/starter \ - # --bind /etc/apptainer/capability.json:/etc/apptainer/capability.json \ diff --git a/devops/singularity-minimal/instance_stop.sh b/devops/singularity-minimal/instance_stop.sh deleted file mode 100755 index 3e13a89e..00000000 --- a/devops/singularity-minimal/instance_stop.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -singularity instance stop test-dsarchive - -find ./opt/* -not -path "*opt/local_*" -not -path "*opt/.gitignore" -delete diff --git a/devops/singularity-minimal/opt/.gitignore b/devops/singularity-minimal/opt/.gitignore deleted file mode 100644 index d6b7ef32..00000000 --- a/devops/singularity-minimal/opt/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/devops/singularity-minimal/provision.yaml b/devops/singularity-minimal/provision.yaml index 87d0ce87..c0f31f79 100644 --- a/devops/singularity-minimal/provision.yaml +++ b/devops/singularity-minimal/provision.yaml @@ -111,13 +111,24 @@ worker: # shell: # - pip -V # - python -V - pip: - # - -e /opt/local_girder_worker - - --force-reinstall -e /opt/local_girder_worker - - -e /opt/local_girder_worker/girder_worker/singularity - - -e /opt/local_girder_worker/girder_worker/slurm - - -e /opt/local_slicer_cli_web - - -e /opt/local_slicer_cli_web/slicer_cli_web/singularity + # pip: + # - -e /opt/local_girder_worker + # - --force-reinstall -e /opt/local_girder_worker + # - -e /opt/local_girder_worker/girder_worker/singularity + # - -e /opt/local_girder_worker/girder_worker/slurm + # - -e /opt/local_slicer_cli_web + # - -e /opt/local_slicer_cli_web/slicer_cli_web/singularity + shell: + - rm -rf /opt/girder_worker /opt/slicer_cli_web + - git clone --branch slurm https://github.com/girder/girder_worker.git /opt/girder_worker + - git clone --branch slicer-cli-web-singularity https://github.com/willdunklin/slicer_cli_web.git /opt/slicer_cli_web + # - cp /opt/worker.dist.cfg /opt/girder_worker/girder_worker/worker.dist.cfg + - pip install -e /opt/girder_worker + - pip install -e /opt/girder_worker/girder_worker/singularity + - pip install -e /opt/girder_worker/girder_worker/slurm + - pip install -e /opt/slicer_cli_web + - pip install -e /opt/slicer_cli_web/slicer_cli_web/singularity + # Run additional shell commands in the worker before start # shell: # - pip -V @@ -126,8 +137,18 @@ worker: # - pip freeze | grep worker # - ls pip: - - -e /opt/local_girder_worker - - -e /opt/local_girder_worker/girder_worker/singularity - - -e /opt/local_girder_worker/girder_worker/slurm - - -e /opt/local_slicer_cli_web - - -e /opt/local_slicer_cli_web/slicer_cli_web/singularity +# - -e /opt/local_girder_worker +# - -e /opt/local_girder_worker/girder_worker/singularity +# - -e /opt/local_girder_worker/girder_worker/slurm +# - -e /opt/local_slicer_cli_web +# - -e /opt/local_slicer_cli_web/slicer_cli_web/singularity +shell: + - rm -rf /opt/girder_worker /opt/slicer_cli_web + - git clone --branch slurm https://github.com/girder/girder_worker.git /opt/girder_worker + - git clone --branch slicer-cli-web-singularity https://github.com/willdunklin/slicer_cli_web.git /opt/slicer_cli_web +# - cp /opt/worker.dist.cfg /opt/girder_worker/girder_worker/worker.dist.cfg + - pip install -e /opt/girder_worker + - pip install -e /opt/girder_worker/girder_worker/singularity + - pip install -e /opt/girder_worker/girder_worker/slurm + - pip install -e /opt/slicer_cli_web + - pip install -e /opt/slicer_cli_web/slicer_cli_web/singularity diff --git a/devops/singularity-minimal/instance_pull.sh b/devops/singularity-minimal/pull_images.sh similarity index 74% rename from devops/singularity-minimal/instance_pull.sh rename to devops/singularity-minimal/pull_images.sh index 9ef2b041..d63192f4 100755 --- a/devops/singularity-minimal/instance_pull.sh +++ b/devops/singularity-minimal/pull_images.sh @@ -2,5 +2,4 @@ singularity pull SIF/rabbitMQ.sif library://sylabs/examples/rabbitmq singularity pull SIF/mongodb.sif docker://mongo:latest -singularity pull SIF/dsarchive.sif docker://suhaskc/dsacommon:latest singularity pull SIF/memcached.sif docker://memcached:latest diff --git a/devops/singularity-minimal/start_girder.sh b/devops/singularity-minimal/start_girder.sh index 281bfea9..590dd199 100755 --- a/devops/singularity-minimal/start_girder.sh +++ b/devops/singularity-minimal/start_girder.sh @@ -1,5 +1,7 @@ #!/bin/bash set -x +. /opt/venv/bin/activate +pip install pyaml # Ensures that the main process runs as the DSA_USER and is part of both that # group and the docker group. Fail if DSA_USER is not specified. # if [[ -z "$DSA_USER" ]]; then diff --git a/devops/singularity-minimal/start_worker.sh b/devops/singularity-minimal/start_worker.sh index 24e70364..42f3a241 100755 --- a/devops/singularity-minimal/start_worker.sh +++ b/devops/singularity-minimal/start_worker.sh @@ -1,5 +1,7 @@ #!/bin/bash set -x +. /opt/venv/bin/activate +pip install pyaml # Ensures that the main process runs as the DSA_USER and is part of both that # group and the docker group. Fail if DSA_USER is not specified. # if [[ -z "$DSA_USER" ]]; then diff --git a/devops/singularity-minimal/worker_opt/.gitignore b/devops/singularity-minimal/worker_opt/.gitignore deleted file mode 100644 index d6b7ef32..00000000 --- a/devops/singularity-minimal/worker_opt/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore From b73b5340c8ebf2b11d1af37c9ae59e046d4e5d36 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Thu, 24 Jul 2025 10:27:08 -0400 Subject: [PATCH 21/24] Update README --- devops/slurm/README.md | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/devops/slurm/README.md b/devops/slurm/README.md index 835db7e6..cb9177e7 100644 --- a/devops/slurm/README.md +++ b/devops/slurm/README.md @@ -96,19 +96,6 @@ docker compose down; DSA_USER=$(id -u):$(id -g) docker compose up ``` -## Note on Apptainer `slicer_cli_web` images - -Typically the DSA executes CLIs using Docker images. `slicer_cli_web` manages the pulling of these images while `girder_worker` manages the execution of the CLIs. - -However with most Slurm/HPC systems, we need to run the CLIs in Apptainer (aka Singularity) containers since Docker execution is limited. Apptainer provides a default mechanism for converting Docker images to Apptainer images. - -One critical difference between Docker and Apptainer images is that Apptainer doesn't have a notion of a `WORKDIR`. Unfortunately this means that `WORKDIR` information is lost during the image conversion process. This causes issues with CLIs that rely on the `WORKDIR` to find files. - -Our current workaround is to manually add an `entry_path` `LABEL` to Docker images we want to convert (where `entry_path` is set to the `WORKDIR` value). - -To make your Docker image CLI compatible with `girder_worker_slurm`, add the `LABEL entry_path=/path/to/workdir` to your image. - - ## See also - [Girder Worker Slurm](https://github.com/girder/girder_worker/tree/slurm/girder_worker/slurm) plugin From 7355b76c6d2c523c6cb2750cb2c70175946bfda9 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Thu, 24 Jul 2025 10:27:17 -0400 Subject: [PATCH 22/24] Add HPC docs --- devops/slurm/HPC.md | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 devops/slurm/HPC.md diff --git a/devops/slurm/HPC.md b/devops/slurm/HPC.md new file mode 100644 index 00000000..40b43663 --- /dev/null +++ b/devops/slurm/HPC.md @@ -0,0 +1,47 @@ +# HPC and DSA + +Most HPC environments disallow Docker execution. This document goes over the uses of Docker in the DSA and how to replace them with HPC-friendly alternatives. + + +## DSA using Docker + +Let's go over the typical use cases of Docker in the DSA. + +### Deployment + +Docker is used in several contexts in the DSA. Firstly, the DSA is typically deployed using a `docker compose` [here](https://github.com/DigitalSlideArchive/digital_slide_archive/tree/master/devops/dsa), which launches the Girder app, worker, etc. The other main use of Docker is with the execution of `slicer_cli_web` jobs (i.e. jobs submitted through HistomicsUI, etc.), where a each job executes Docker container ran by the `girder_worker`. In this context, Docker is easily replacable since its only used to orchestrate processes. + +To address the first case, to replace Docker in the DSA deployment the recommended option is to use Podman. Several research partners (namely Tulane and Pitt) have deployments executed using Podman as a drop in replacement for Docker. Podman configuration is required (TODO: add known details). + +There's an alternative DSA deployment option using Apptainer (used by University of Florida) found [here](https://github.com/DigitalSlideArchive/digital_slide_archive/tree/slurm/devops/singularity-minimal), however, Podman is recommended for its stability and ease of configuration. + +### Job Execution + +Now for the second use case of Docker, executing `slicer_cli_web` jobs, we use Apptainer (formerly known as singularity) and optionally slurm. Most of our research partners choose to use DSA Slurm execution. We have working DSA slurm deployments with (Pitt, Tulane, and UF). To enable Apptainer/slurm jobs you have to install several python plugins to the Girder/worker environments (see requirements and the DSA Slurm Reference for more information). + + +## Requirements + +For Podman deployment: +- [Podman](https://podman.io/docs) installed on host system +- (TODO: add `devops/podman` example) + + +For Apptainer job execution: +- [Apptainer](https://apptainer.org/docs/admin/main/installation.html) +- [Girder Worker Singularity](https://github.com/girder/girder_worker/tree/slurm/girder_worker/singularity) plugin +- [Slicer CLI Web Singularity](https://github.com/girder/slicer_cli_web/tree/slicer-cli-web-singularity/slicer_cli_web/singularity) plugin + + +For Apptainer job execution using Slurm: +- All above Apptainer requirements +- [Girder Worker Slurm](https://github.com/girder/girder_worker/tree/slurm/girder_worker/slurm) plugin +- Apptainer installed on all slurm nodes (login & compute) +- Shared filesystem visible to worker and compute nodes +- Slurm login node access (`squeue`, `sinfo`, `scontrol`, etc.) for `girder_worker` +- See [DSA Slurm (Reference Docs)](https://github.com/DigitalSlideArchive/digital_slide_archive/tree/slurm/devops/slurm) for more details + + +## Credentials + +TODO: unsure of what to put here, ask David From c343fe697ff98f16414263e94b0404d774fe2ae5 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 2 Jun 2026 17:19:32 -0400 Subject: [PATCH 23/24] Add singularity-5 example --- devops/singularity-5/.gitignore | 10 ++ devops/singularity-5/README.md | 16 +++ devops/singularity-5/SIF/.gitignore | 2 + devops/singularity-5/build.sh | 3 + devops/singularity-5/dsa_common.def | 80 +++++++++++ devops/singularity-5/dsa_compose.sh | 111 +++++++++++++++ devops/singularity-5/fuse/.gitignore | 2 + devops/singularity-5/girder.cfg | 6 + devops/singularity-5/logs/.gitignore | 2 + devops/singularity-5/provision.yaml | 139 +++++++++++++++++++ devops/singularity-5/pull_images.sh | 5 + devops/singularity-5/rabbitmqdata/.gitignore | 2 + devops/singularity-5/singularity-compose.yml | 21 +++ devops/singularity-5/start_girder.sh | 30 ++++ devops/singularity-5/start_worker.sh | 11 ++ devops/singularity-5/tmp/.gitignore | 2 + 16 files changed, 442 insertions(+) create mode 100644 devops/singularity-5/.gitignore create mode 100644 devops/singularity-5/README.md create mode 100644 devops/singularity-5/SIF/.gitignore create mode 100755 devops/singularity-5/build.sh create mode 100644 devops/singularity-5/dsa_common.def create mode 100755 devops/singularity-5/dsa_compose.sh create mode 100644 devops/singularity-5/fuse/.gitignore create mode 100644 devops/singularity-5/girder.cfg create mode 100644 devops/singularity-5/logs/.gitignore create mode 100644 devops/singularity-5/provision.yaml create mode 100755 devops/singularity-5/pull_images.sh create mode 100644 devops/singularity-5/rabbitmqdata/.gitignore create mode 100644 devops/singularity-5/singularity-compose.yml create mode 100755 devops/singularity-5/start_girder.sh create mode 100755 devops/singularity-5/start_worker.sh create mode 100644 devops/singularity-5/tmp/.gitignore diff --git a/devops/singularity-5/.gitignore b/devops/singularity-5/.gitignore new file mode 100644 index 00000000..203ff09c --- /dev/null +++ b/devops/singularity-5/.gitignore @@ -0,0 +1,10 @@ +assetstore +db +# opt +# logs +# fuse + +girder +mongodb +etc.hosts +resolv.conf diff --git a/devops/singularity-5/README.md b/devops/singularity-5/README.md new file mode 100644 index 00000000..1484fdce --- /dev/null +++ b/devops/singularity-5/README.md @@ -0,0 +1,16 @@ +# Apptainer DSA (Girder 5) + +Launches DSA containers using `apptainer`. + +## Getting Started + +```bash +# Build dsa_common with apptainer +./build.sh + +# Pull DSA service images +./pull_images.sh + +# Start DSA services +./dsa_compose.sh +``` diff --git a/devops/singularity-5/SIF/.gitignore b/devops/singularity-5/SIF/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-5/SIF/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-5/build.sh b/devops/singularity-5/build.sh new file mode 100755 index 00000000..4459634a --- /dev/null +++ b/devops/singularity-5/build.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +sudo apptainer build SIF/dsa_common.sif dsa_common.def diff --git a/devops/singularity-5/dsa_common.def b/devops/singularity-5/dsa_common.def new file mode 100644 index 00000000..7b3989a5 --- /dev/null +++ b/devops/singularity-5/dsa_common.def @@ -0,0 +1,80 @@ +Bootstrap: docker +From: dsarchive/dsa_common_5 + +%environment + export NVM_DIR=/root_opt/nvm + [ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh" + [ -s /root_opt/venv/bin/activate ] && . /root_opt/venv/bin/activate + +%post -c /bin/bash + set -euxo pipefail + + # install nvm / node properly + export NVM_DIR=/opt/nvm + + git clone https://github.com/nvm-sh/nvm.git "$NVM_DIR" + . "$NVM_DIR/nvm.sh" + + nvm install 24 + nvm alias default 24 + nvm use default + + # install pyaml in virtual environment + . /opt/venv/bin/activate + /opt/venv/bin/pip install pyaml setuptools + + # install apptainer + apt-get update + apt-get install -y --no-install-recommends \ + ca-certificates \ + fuse3 \ + software-properties-common \ + squashfs-tools \ + uidmap \ + wget + + # Prefer distro package if available; otherwise install a pinned .deb from + # GitHub releases (works in environments where Launchpad PPAs are blocked). + if ! apt-get install -y --no-install-recommends apptainer; then + APPTAINER_VERSION="${APPTAINER_VERSION:-1.4.3}" + wget -O /tmp/apptainer.deb \ + "https://github.com/apptainer/apptainer/releases/download/v${APPTAINER_VERSION}/apptainer_${APPTAINER_VERSION}_amd64.deb" + apt-get install -y /tmp/apptainer.deb + rm -f /tmp/apptainer.deb + fi + apptainer --version + + # preserve image /opt so it can be copied into bind-mounted /opt at startup + mv /opt /root_opt + + apt-get clean + rm -rf /var/lib/apt/lists/* + +%runscript + set -x + + if [ -d /opt/nvm ]; then + export NVM_DIR=/opt/nvm + . "$NVM_DIR/nvm.sh" + else + export NVM_DIR=/root_opt/nvm + . "$NVM_DIR/nvm.sh" + fi + + if [ -f /opt/venv/bin/activate ]; then + . /opt/venv/bin/activate + else + . /root_opt/venv/bin/activate + fi + + exec bash -c "$@" + +%startscript + set -x + + # assuming that the user mounts a local directory to /opt/ + # this is necessary for getting the correct singularity permissions + echo " - Copying docker image's opt (/root_opt) to user mounted /opt" + echo " - Make sure you're mounting a local directory to /opt" + cp -r /root_opt/* /opt/ + /opt/venv/bin/pip install pyaml setuptools diff --git a/devops/singularity-5/dsa_compose.sh b/devops/singularity-5/dsa_compose.sh new file mode 100755 index 00000000..bd46c4b9 --- /dev/null +++ b/devops/singularity-5/dsa_compose.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +set -euxo pipefail + +cd "$(dirname "$0")" +GIRDER_SRC="${GIRDER_SRC:-$(realpath ../../../girder)}" +if [ ! -d "$GIRDER_SRC" ]; then + echo "Missing GIRDER_SRC directory: $GIRDER_SRC" + exit 1 +fi + +apptainer instance stop -a || echo "No instances stopped" + +# Load Modules +# module load slurm-drmaa + +# Start instances +# MongoDB, RabbitMQ, Redis +apptainer instance start \ + --bind ./db:/data/db \ + SIF/mongodb.sif dsa-mongodb-1 + +apptainer instance start \ + --env RABBITMQ_DEFAULT_USER=guest \ + --env RABBITMQ_DEFAULT_PASS=guest \ + --bind ./rabbitmqdata:/var/lib/rabbitmq/ \ + SIF/rabbitMQ.sif dsa-rabbitMQ-1 + +apptainer instance start SIF/redis.sif dsa-redis-1 + +# # clean girder opt +# find ./opt/* -not -path "*opt/local_*" -not -path "*opt/.gitignore" -delete + +# # set up worker opt +# # rm -rf ./worker_opt/* +# # cp -r ./opt/* ./worker_opt/ +rm -rf ./tmp/* +mkdir -p ./tmp/sifs +TMP_OPT_GIRDER=$(mktemp -d --tmpdir=./tmp) +TMP_OPT_WORKER=$(mktemp -d --tmpdir=./tmp) + +## Start Girder and Worker +apptainer instance start \ + --bind ./assetstore:/assetstore \ + --bind ./logs:/logs \ + --bind ./tmp:/tmp \ + --bind ./fuse:/fuse \ + --bind ./girder.cfg:/etc/girder.cfg \ + --bind ./start_girder.sh:/opt/start_girder.sh \ + --bind ./provision.yaml:/opt/provision.yaml \ + --bind ../ver5/provision.py:/opt/provision.py \ + --bind "$GIRDER_SRC":/src/girder \ + --bind $TMP_OPT_GIRDER:/opt \ + SIF/dsa_common.sif test-dsarchive + +apptainer instance start \ + --bind ./logs:/logs \ + --bind ./start_worker.sh:/opt/start_worker.sh \ + --bind ./provision.yaml:/opt/provision.yaml \ + --bind ../ver5/provision.py:/opt/provision.py \ + --bind "$GIRDER_SRC":/src/girder \ + --bind $TMP_OPT_WORKER:/opt \ + SIF/dsa_common.sif dsa-worker-1 + # --bind ./worker_opt:/opt \ + + # --bind ./blue:/blue/pinaki.sarder/rc-svc-pinaki.sarder-web \ + # --no-mount /cmsuf \ + # --bind /blue/pinaki.sarder/rc-svc-pinaki.sarder-web/logs:/logs \ + # --bind /apps \ + # --bind /var/run/munge:/run/munge \ + # --bind /opt/slurm \ + +## Execute shells +apptainer exec instance://dsa-mongodb-1 mongod > /dev/null & + +sleep 5 # TODO: WHY THE HELL IS THERE A RACE CONDITIONNNNNN + # the files seem to be not mounted properly before this stuff runs + +apptainer run \ + --env SIF_IMAGE_PATH="$(pwd)/tmp/sifs/" \ + --env TMPDIR="$(pwd)/tmp" \ + --env LOGS="$(pwd)/logs" \ + --env DSA_PROVISION_YAML=/opt/provision.yaml \ + --env GIRDER_WORKER_BROKER=amqp://guest:guest@localhost:5672/ \ + --env GIRDER_WORKER_BACKEND=rpc://guest:guest@localhost:5672/ \ + --env CELERY_BROKER_URL=amqp://guest:guest@localhost:5672/ \ + --env CELERY_RESULT_BACKEND=rpc://guest:guest@localhost:5672/ \ + instance://dsa-worker-1 /opt/start_worker.sh & + + # --env PATH=/opt/slurm/bin:$PATH \ + # --env SLURM_QOS=pinaki.sarder-dsa \ + # --env SLURM_ACCOUNT=pinaki.sarder-dsa \ + + +sleep 30 + +apptainer run \ + --env SIF_IMAGE_PATH="$(pwd)/tmp/sifs/" \ + --env TMPDIR="$(pwd)/tmp" \ + --env LOGS="$(pwd)/logs" \ + --env DSA_PROVISION_YAML=/opt/provision.yaml \ + --env CELERY_BROKER_URL=amqp://guest:guest@localhost:5672/ \ + --env CELERY_RESULT_BACKEND=rpc://guest:guest@localhost:5672/ \ + --env GIRDER_NOTIFICATION_REDIS_URL=redis://localhost:6379 \ + --env LARGE_IMAGE_CACHE_BACKEND=redis \ + --env LARGE_IMAGE_CACHE_REDIS_URL=localhost:6379 \ + --env DSA_WORKER_API_URL=http://localhost:8080/api/v1 \ + instance://test-dsarchive /opt/start_girder.sh + + # --env PATH=/opt/slurm/bin:$PATH \ + # --env SLURM_QOS=pinaki.sarder-dsa \ + # --env SLURM_ACCOUNT=pinaki.sarder-dsa \ diff --git a/devops/singularity-5/fuse/.gitignore b/devops/singularity-5/fuse/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-5/fuse/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-5/girder.cfg b/devops/singularity-5/girder.cfg new file mode 100644 index 00000000..a98615bc --- /dev/null +++ b/devops/singularity-5/girder.cfg @@ -0,0 +1,6 @@ +[global] +server.socket_host = "0.0.0.0" +server.max_request_body_size = 1073741824 + +[database] +uri = "mongodb://localhost:27017/girder?socketTimeoutMS=3600000" diff --git a/devops/singularity-5/logs/.gitignore b/devops/singularity-5/logs/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-5/logs/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-5/provision.yaml b/devops/singularity-5/provision.yaml new file mode 100644 index 00000000..bba8c9ac --- /dev/null +++ b/devops/singularity-5/provision.yaml @@ -0,0 +1,139 @@ +--- +# The provision script can take a yaml file with provision options +# This is a dictionary of command-line arguments for the provisioning script +force: False +samples: False +clean-delete-locks: True +sample-collection: Samples +sample-folder: Images +# Set use-defaults to False to skip default settings +use-defaults: True +# Set mongo_compat to False to not automatically set the mongo feature +# compatibility version to the current server version. +mongo-compat: True +# A list of additional pip modules to install; if any are girder plugins with +# client-side code, that code must be built in the shell step. +# pip: +# - girder-oauth +# - girder-ldap +# Run additional shell commands before start +# shell: +# - ls +# Default admin user if there are no admin users +admin: + login: admin + password: password + firstName: Admin + lastName: Admin + email: admin@nowhere.nil + public: True +# Default assetstore if there are no assetstores +assetstore: + method: createFilesystemAssetstore + name: Assetstore + root: /assetstore +# Any resources to ensure exist. A model must be specified. This creates the +# resource if there is no match for all specified values. A value of +# "resource:" is converted to the resource document with that resource +# path. "resource:admin" uses the default admin, "resourceid:" is the +# string id for the resource path, and "resourceid:admin" is the string if for +# default admin. +# You can add metadata to a resource. The default key is meta. If +# metadata_update is False, metadata will not be set if any metadata +# already exists. +resources: + - model: collection + name: Tasks + creator: resource:admin + public: True + - model: folder + parent: resource:collection/Tasks + parentType: collection + name: "Slicer CLI Web Tasks" + creator: resource:admin + public: True + # metadata: + # sample_key: sample_value + # metadata_key: meta + # metadata_update: True +settings: + worker.api_url: "http://localhost:8080/api/v1" + worker.direct_path: True + + core.brand_name: "Digital Slide Archive" + # core.http_only_cookies: True + histomicsui.webroot_path: "histomics" + histomicsui.alternate_webroot_path: "histomicstk" + histomicsui.delete_annotations_after_ingest: True + homepage.markdown: |- + # Digital Slide Archive + --- + ## Bioinformatics Platform + + Welcome to the **Digital Slide Archive**. + + Developers who want to use the Girder REST API should check out the + [interactive web API docs](api/v1). + + The [HistomicsUI](histomics) application is enabled. + slicer_cli_web.task_folder: "resourceid:collection/Tasks/Slicer CLI Web Tasks" +# List slicer-cli-images to pull, if not present, and load +# slicer-cli-image: +# - dsarchive/histomicstk:latest +# List slicer-cli-images to always pull, and load +slicer-cli-image-pull: + # - dsarchive/histomicstk:latest +# The worker can specify parameters for provisioning +# worker-rabbitmq-host: localhost:5672 +worker-rabbitmq-user: guest +worker-rabbitmq-pass: guest +worker-rabbitmq-host: localhost +worker-config: /src/girder/worker/girder_worker/worker.local.cfg +# These have precedence over the top level values +worker: + # rabbitmq-host: girder:8080 + # rabbitmq-user: guest + # rabbitmq-pass: guest + # config: /opt/girder_worker/girder_worker/worker.local.cfg + # Install additional pip packages in the worker + # shell: + # - pip -V + # - python -V + # pip: + # - -e /opt/local_girder_worker + # - --force-reinstall -e /opt/local_girder_worker + # - -e /opt/local_girder_worker/girder_worker/singularity + # - -e /opt/local_girder_worker/girder_worker/slurm + # - -e /opt/local_slicer_cli_web + # - -e /opt/local_slicer_cli_web/slicer_cli_web/singularity + shell: + - pip install --force-reinstall -e /src/girder + - pip install --force-reinstall -e /src/girder/worker + - pip install --force-reinstall -e /src/girder/plugins/slicer_cli_web + - pip install --force-reinstall --no-deps -e /src/girder/worker/girder_worker/singularity + # - pip install --force-reinstall --no-deps -e /src/girder/worker/girder_worker/slurm + - pip install --force-reinstall --no-deps -e /src/girder/plugins/slicer_cli_web/slicer_cli_web/singularity + - pip install --upgrade "girder-client>=5.0.0a5" + + # Run additional shell commands in the worker before start + # shell: + # - pip -V + # - pip freeze | grep worker + # - pip install -e /opt/local_girder_worker + # - pip freeze | grep worker + # - ls +pip: +# - -e /opt/local_girder_worker +# - -e /opt/local_girder_worker/girder_worker/singularity +# - -e /opt/local_girder_worker/girder_worker/slurm +# - -e /opt/local_slicer_cli_web +# - -e /opt/local_slicer_cli_web/slicer_cli_web/singularity +shell: + - /bin/bash -lc 'cd /src/girder/plugins/slicer_cli_web/slicer_cli_web/web_client && if [ ! -f dist/style.css ]; then npm ci && npm run build; fi' + - pip install --force-reinstall -e /src/girder + - pip install --force-reinstall -e /src/girder/worker + - pip install --force-reinstall -e /src/girder/plugins/slicer_cli_web + - pip install --force-reinstall --no-deps -e /src/girder/worker/girder_worker/singularity +# - pip install --force-reinstall --no-deps -e /src/girder/worker/girder_worker/slurm + - pip install --force-reinstall --no-deps -e /src/girder/plugins/slicer_cli_web/slicer_cli_web/singularity + - pip install --upgrade "girder-client>=5.0.0a5" diff --git a/devops/singularity-5/pull_images.sh b/devops/singularity-5/pull_images.sh new file mode 100755 index 00000000..ebc7a597 --- /dev/null +++ b/devops/singularity-5/pull_images.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +apptainer pull SIF/rabbitMQ.sif library://sylabs/examples/rabbitmq +apptainer pull SIF/mongodb.sif docker://mongo:latest +apptainer pull SIF/redis.sif docker://redis:latest diff --git a/devops/singularity-5/rabbitmqdata/.gitignore b/devops/singularity-5/rabbitmqdata/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-5/rabbitmqdata/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/devops/singularity-5/singularity-compose.yml b/devops/singularity-5/singularity-compose.yml new file mode 100644 index 00000000..dd3e385e --- /dev/null +++ b/devops/singularity-5/singularity-compose.yml @@ -0,0 +1,21 @@ +--- +version: '2.0' +instances: + girder: + image: "docker://dsarchive/dsa_common_5" + name: girder + # restart: unless-stopped + volumes: + - ./assetstore:/assetstore + - ./girder.cfg:/etc/girder.cfg + depends_on: + - mongodb + command: + bash -c 'python provision.py --sample-data && girder serve' + mongodb: + image: "docker://mongo:latest" + name: mongodb + # restart: unless-stopped + command: --nojournal + volumes: + - ./db:/data/db diff --git a/devops/singularity-5/start_girder.sh b/devops/singularity-5/start_girder.sh new file mode 100755 index 00000000..9a85d47b --- /dev/null +++ b/devops/singularity-5/start_girder.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -euxo pipefail + +. /opt/venv/bin/activate +pip install pyaml setuptools + +echo ==== Pre-Provisioning === +python /opt/provision.py -v --pre --yaml /opt/provision.yaml + +echo ==== Provisioning === +python /opt/provision.py -v --main --yaml /opt/provision.yaml + +echo ==== Creating FUSE mount === +girder mount ${DSA_GIRDER_MOUNT_OPTIONS:-} /fuse || true + +echo ==== Starting Local Worker === +celery -A girder_worker.app worker -Q local --concurrency 4 & + +echo ==== Starting Girder === +girder serve --host=0.0.0.0 & +girder_pid=$! +until curl --silent http://localhost:8080/api/v1/system/version >/dev/null 2>/dev/null; do + echo -n . + sleep 1 +done + +echo ==== Postprovisioning === +python /opt/provision.py -v --post --yaml /opt/provision.yaml + +wait ${girder_pid} diff --git a/devops/singularity-5/start_worker.sh b/devops/singularity-5/start_worker.sh new file mode 100755 index 00000000..4d1e6b9c --- /dev/null +++ b/devops/singularity-5/start_worker.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -euxo pipefail +. /opt/venv/bin/activate +pip install pyaml setuptools + +echo ==== Pre-Provisioning === +/opt/venv/bin/python3 /opt/provision.py --worker-pre -v --yaml /opt/provision.yaml +echo ==== Provisioning === && +/opt/venv/bin/python3 /opt/provision.py --worker-main -v --yaml /opt/provision.yaml +echo ==== Starting Worker === && +DOCKER_CLIENT_TIMEOUT=86400 TMPDIR=${TMPDIR:-/tmp} GW_DIRECT_PATHS=true celery -A girder_worker.app.app worker --concurrency=${DSA_WORKER_CONCURRENCY:-2} -Ofair --prefetch-multiplier=1 diff --git a/devops/singularity-5/tmp/.gitignore b/devops/singularity-5/tmp/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/devops/singularity-5/tmp/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore From 951d6ded51256351ce5803adf1a135e780e47f70 Mon Sep 17 00:00:00 2001 From: willdunklin Date: Tue, 16 Jun 2026 14:21:37 -0400 Subject: [PATCH 24/24] Remove unused file --- devops/dsa/altfs/.gitignore | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 devops/dsa/altfs/.gitignore diff --git a/devops/dsa/altfs/.gitignore b/devops/dsa/altfs/.gitignore deleted file mode 100644 index d6b7ef32..00000000 --- a/devops/dsa/altfs/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore