forked from nvidia-holoscan/holohub
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SegmentAnything 2 application using Holoscan --------- Signed-off-by: maximilianofir <[email protected]>
- Loading branch information
1 parent
0641573
commit 66769c4
Showing
15 changed files
with
1,361 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# syntax=docker/dockerfile:1 | ||
|
||
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
FROM nvcr.io/nvidia/pytorch:24.06-py3 as pytorch | ||
|
||
|
||
# Install dependencies | ||
RUN apt-get update && apt-get install -y \ | ||
git \ | ||
git-lfs \ | ||
x11-apps \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Set up Git LFS | ||
RUN git lfs install | ||
|
||
# Clone sam2 | ||
WORKDIR /workspace | ||
ARG COMPUTE_CAPACITY | ||
RUN git clone https://github.com/facebookresearch/segment-anything-2.git \ | ||
&& cd segment-anything-2 \ | ||
&& python3 -m pip install --no-cache-dir -e . \ | ||
&& python3 -m pip install --no-cache-dir -e ".[demo]" \ | ||
&& cd checkpoints \ | ||
&& ./download_ckpts.sh | ||
WORKDIR /workspace | ||
|
||
# Check the architecture and download the CUDA keyring | ||
RUN if [ $(uname -m) = "aarch64" ]; then ARCH=arm64 \ | ||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/cuda-keyring_1.1-1_all.deb \ | ||
; elif [ $(uname -m) = "x86_64" ]; then ARCH=x86_64 \ | ||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \ | ||
; else echo "Unsupported architecture"; fi | ||
RUN dpkg -i cuda-keyring_1.1-1_all.deb \ | ||
&& apt-get update \ | ||
&& apt-get -y install holoscan \ | ||
&& export PYTHONPATH="/opt/nvidia/holoscan/python/lib" | ||
|
||
# Setup Docker & NVIDIA Container Toolkit's apt repositories to enable DooD | ||
# for packaging & running applications with the CLI | ||
# Ref: Docker installation: https://docs.docker.com/engine/install/ubuntu/ | ||
# DooD (Docker-out-of-Docker): use the Docker (or Moby) CLI in your dev container to connect to | ||
# your host's Docker daemon by bind mounting the Docker Unix socket. | ||
RUN install -m 0755 -d /etc/apt/keyrings \ | ||
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg \ | ||
&& chmod a+r /etc/apt/keyrings/docker.gpg \ | ||
&& echo "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ | ||
"$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ | ||
tee /etc/apt/sources.list.d/docker.list > /dev/null | ||
|
||
RUN apt-get update \ | ||
&& apt-get install --no-install-recommends -y \ | ||
valgrind="1:3.18.1-*" \ | ||
xvfb="2:21.1.4-*" \ | ||
libx11-dev="2:1.7.5-*" \ | ||
libxcb-glx0="1.14-*" \ | ||
libxcursor-dev="1:1.2.0-*" \ | ||
libxi-dev="2:1.8-*" \ | ||
libxinerama-dev="2:1.1.4-*" \ | ||
libxrandr-dev="2:1.5.2-*" \ | ||
libvulkan-dev="1.3.204.1-*" \ | ||
glslang-tools="11.8.0+1.3.204.0-*" \ | ||
vulkan-validationlayers="1.3.204.1-*" \ | ||
libwayland-dev="1.20.0-*" \ | ||
libxkbcommon-dev="1.4.0-*" \ | ||
pkg-config="0.29.2-*" \ | ||
libdecor-0-plugin-1-cairo="0.1.0-*" \ | ||
libegl1="1.4.0-*" \ | ||
libopenblas0="0.3.20+ds-*" \ | ||
libv4l-dev="1.22.1-*" \ | ||
v4l-utils="1.22.1-*" \ | ||
libpng-dev="1.6.37-*" \ | ||
libjpeg-turbo8-dev="2.1.2-*" \ | ||
docker-ce-cli="5:25.0.3-*" \ | ||
docker-buildx-plugin="0.12.1-*" \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
# start a bash shell to debug | ||
RUN /bin/bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# 📷 Holoscan SAM2 | ||
|
||
This application demonstrates how to run [SAM2](https://github.com/facebookresearch/segment-anything-2) models on live video feed with the possibility of changing query points in real-time. | ||
|
||
The application currently uses a single query point as a foreground point that moves on the perimeter of a circle with a configured angular speed. | ||
The models returns three masks, the best mask is selected based on the model scores. For visualization, two options exist. Select between "logits" or "masks". | ||
- "logits": predictions of the network, mapped onto a colorscale that matches matplotlib.pyplot's "viridis" | ||
- "masks": binarized predictions | ||
|
||
SAM2, recently announced by Meta, is the next iteration of the Segment Anything Model (SAM). This new version expands upon its predecessor by adding the capability to segment both videos and images. | ||
This sample application wraps the ImageInference class, and applies it on a live video feed. | ||
|
||
Note: This demo currently uses ["sam2_hiera_l.yaml"](https://huggingface.co/Efficient-Large-Model/Llama-3-VILA1.5-8b-AWQ), but any of the sam2 models work. You only need to adjust [segment_one_thing.yaml](./segment_one_thing.yaml). | ||
|
||
## ⚙️ Setup Instructions | ||
The app defaults to using the video device at `/dev/video0` | ||
|
||
To debug if this is the correct device download `v4l2-ctl`: | ||
```bash | ||
sudo apt-get install v4l-utils | ||
``` | ||
To check for your devices run: | ||
```bash | ||
v4l2-ctl --list-devices | ||
``` | ||
This command will output something similar to this: | ||
```bash | ||
NVIDIA Tegra Video Input Device (platform:tegra-camrtc-ca): | ||
/dev/media0 | ||
|
||
vi-output, lt6911uxc 2-0056 (platform:tegra-capture-vi:0): | ||
/dev/video0 | ||
|
||
Dummy video device (0x0000) (platform:v4l2loopback-000): | ||
/dev/video3 | ||
``` | ||
Determine your desired video device and edit the source device in [segment_one_thing.yaml](segment_one_thing.yaml) | ||
|
||
## 🚀 Build and Run Instructions | ||
|
||
### ARM64 and x86 | ||
This application uses a custom Dockerfile based on a pytorch container. | ||
Build and run the application using | ||
```sh | ||
./dev_container build_and_run sam2 --docker_file applications/sam2/Dockerfile --img holohub:sam2 | ||
``` | ||
Or first build the container, then launch it and run. | ||
|
||
```sh | ||
./dev_container build --docker_file applications/sam2/Dockerfile --img holohub:sam2 | ||
``` | ||
```sh | ||
./dev_container launch --img holohub:sam | ||
``` | ||
```sh | ||
python holohub/applications/sam2/segment_one_thing.py | ||
``` | ||
|
||
### x86 only | ||
If you are only using an x86 system, you may use a Dockerfile based on the Holoscan container. Replace the [Dockerfile](./Dockerfile) with this [alternative Dockerfile](./alternative_docker/Dockerfile). | ||
Then, from the Holohub main directory run the following command: | ||
```sh | ||
./dev_container build_and_run sam2 | ||
``` | ||
|
||
Alternatively build and run: | ||
```bash | ||
./dev_container vscode sam2 | ||
``` | ||
Run the application in debug mode from vscode, or execute it by | ||
```sh | ||
python applications/sam2/segment_one_thing.py | ||
``` | ||
|
||
|
||
You can choose to output "logits" or "masks" in the configuration of the postprocessor and holoviz operator [segment_one_thing.yaml](segment_one_thing.yaml) | ||
|
||
## 💻 Supported Hardware | ||
- x86 w/ dGPU | ||
- IGX devKit w/ dGPU | ||
|
||
## 🙌 Acknowledgements | ||
- Meta, [SAM2](https://github.com/facebookresearch/segment-anything-2): for providing these models and inference infrastructure |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# syntax=docker/dockerfile:1 | ||
|
||
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
ARG BASE_IMAGE | ||
ARG GPU_TYPE | ||
ARG COMPUTE_CAPACITY | ||
############################################################ | ||
# Torch image (Used to extract the ARM64 dGPU pytorch wheel) | ||
############################################################ | ||
FROM nvcr.io/nvidia/pytorch:24.06-py3 as pytorch | ||
|
||
############################################################ | ||
# Base image | ||
############################################################ | ||
FROM ${BASE_IMAGE} as base | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
|
||
# Holohub setup | ||
RUN mkdir -p /tmp/scripts | ||
COPY run /tmp/scripts/ | ||
RUN chmod +x /tmp/scripts/run | ||
RUN /tmp/scripts/run setup | ||
|
||
# Install dependencies | ||
RUN apt-get update && apt-get install -y \ | ||
git \ | ||
git-lfs \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Set up Git LFS | ||
RUN git lfs install | ||
|
||
# Copy over app-specific requirements | ||
COPY applications/sam2/requirements.txt /tmp/requirements.txt | ||
|
||
# Copy the Torch wheel from the PyTorch image. This is necessary because the PyTorch wheel is not available for ARM64 on PyPI. | ||
WORKDIR /tmp/pip/ | ||
# enter a bash shell to debug | ||
RUN /bin/bash | ||
# Install the Torch wheel based on the target architecture | ||
# ARG TARGETARCH | ||
# RUN if [ "$TARGETARCH" = "amd64" ]; then \ | ||
# echo "Building for x86 (AMD64) architecture"; \ | ||
# python3 -m pip install torch>=2.3.1; \ | ||
# elif [ "$TARGETARCH" = "arm64" ]; then \ | ||
# echo "Not tested on ARM - Building for ARM64 architecture"; \ | ||
# python3 -m pip install torch>=2.3.1; \ | ||
# else \ | ||
# echo "Unknown architecture: $TARGETARCH"; \ | ||
# exit 1; \ | ||
# fi | ||
|
||
# Clone sam2 | ||
WORKDIR /workspace | ||
ARG COMPUTE_CAPACITY | ||
RUN git clone https://github.com/facebookresearch/segment-anything-2.git \ | ||
&& cd segment-anything-2 \ | ||
&& python3 -m pip install --no-cache-dir -e . \ | ||
&& python3 -m pip install --no-cache-dir -e ".[demo]" \ | ||
&& cd checkpoints \ | ||
&& ./download_ckpts.sh | ||
|
||
# Install setuptools prior to all other requirements to avoid install errors | ||
RUN python3 -m pip install --no-cache-dir setuptools && \ | ||
python3 -m pip install --no-cache-dir -r /tmp/requirements.txt | ||
|
||
WORKDIR /workspace/holohub |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
{ | ||
"application": { | ||
"name": "sam2", | ||
"authors": [ | ||
{ | ||
"name": "Holoscan Team", | ||
"affiliation": "NVIDIA" | ||
} | ||
], | ||
"language": "Python", | ||
"version": "1.0.0", | ||
"changelog": { | ||
"1.0": "Initial Release" | ||
}, | ||
"holoscan_sdk": { | ||
"minimum_required_version": "2.0.0", | ||
"tested_versions": [ | ||
"2.0.0" | ||
] | ||
}, | ||
"platforms": [ | ||
"amd64", | ||
"arm64" | ||
], | ||
"tags": [ | ||
"SAM2 Model" | ||
], | ||
"ranking": 1, | ||
"dependencies": { | ||
"SAM2": { | ||
"source": "https://github.com/facebookresearch/segment-anything-2", | ||
"version": "1.0", | ||
"license": "Apache-2.0" | ||
} | ||
}, | ||
|
||
"run": { | ||
"command": "python '<holohub_app_source>/segment_one_thing.py'", | ||
"workdir": "holohub_bin" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
cupy-cuda12x | ||
numpy<2.0.0 |
Oops, something went wrong.