diff --git a/.github/docker-build-config.yml b/.github/docker-build-config.yml index 11bac0f94..78f22f122 100644 --- a/.github/docker-build-config.yml +++ b/.github/docker-build-config.yml @@ -22,6 +22,8 @@ backend_tag_prefix: nemoasr- - backend_dir_name: segment_anything_model backend_tag_prefix: sam- +- backend_dir_name: segment_anything_2 + backend_tag_prefix: sa2- - backend_dir_name: sklearn_text_classifier backend_tag_prefix: sklearntxtclass- - backend_dir_name: spacy diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d5aa3a0d0..65a609aed 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,7 +36,7 @@ jobs: - uses: hmarr/debug-action@v3.0.0 - name: Free Disk Space - if: matrix.ml_backend == 'mmdetection-3' + if: matrix.ml_backend == 'mmdetection-3' || matrix.ml_backend == 'segment_anything_2' uses: jlumbroso/free-disk-space@main with: # this might remove tools that are actually needed, diff --git a/label_studio_ml/examples/segment_anything_2/.dockerignore b/label_studio_ml/examples/segment_anything_2/.dockerignore new file mode 100644 index 000000000..b8a7ea062 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/.dockerignore @@ -0,0 +1,18 @@ +# Exclude everything +** + +# Include Dockerfile and docker-compose for reference (optional, decide based on your use case) +!Dockerfile +!docker-compose.yml + +# Include Python application files +!*.py + +# Include requirements files +!requirements*.txt + +# Include script +!*.sh + +# Exclude specific requirements if necessary +# requirements-test.txt (Uncomment if you decide to exclude this) diff --git a/label_studio_ml/examples/segment_anything_2/Dockerfile b/label_studio_ml/examples/segment_anything_2/Dockerfile new file mode 100644 index 000000000..d2ee7f93a --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/Dockerfile @@ -0,0 +1,55 @@ +FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime +ARG DEBIAN_FRONTEND=noninteractive +ARG TEST_ENV + +WORKDIR /app + +RUN conda update conda -y + +RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \ + --mount=type=cache,target="/var/lib/apt/lists",sharing=locked \ + apt-get -y update \ + && apt-get install -y git \ + && apt-get install -y wget \ + && apt-get install -y g++ freeglut3-dev build-essential libx11-dev \ + libxmu-dev libxi-dev libglu1-mesa libglu1-mesa-dev libfreeimage-dev \ + && apt-get -y install ffmpeg libsm6 libxext6 libffi-dev python3-dev python3-pip gcc + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_CACHE_DIR=/.cache \ + PORT=9090 \ + WORKERS=2 \ + THREADS=4 \ + CUDA_HOME=/usr/local/cuda \ + SEGMENT_ANYTHING_2_REPO_PATH=/segment-anything-2 + +RUN conda install -c "nvidia/label/cuda-12.1.1" cuda -y +ENV CUDA_HOME=/opt/conda \ + TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6+PTX;8.9;9.0" + +COPY requirements.txt . +RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \ + pip3 install --no-cache-dir -r requirements.txt + +# install segment-anything-2 +RUN cd / && git clone https://github.com/facebookresearch/segment-anything-2.git +WORKDIR /segment-anything-2 +RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \ + pip3 install -e . +RUN cd checkpoints +RUN ./download_ckpts.sh + +WORKDIR /app + +# install test requirements if needed +COPY requirements-test.txt . +# build only when TEST_ENV="true" +RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \ + if [ "$TEST_ENV" = "true" ]; then \ + pip3 install -r requirements-test.txt; \ + fi + +COPY . ./ + +CMD ["/app/start.sh"] diff --git a/label_studio_ml/examples/segment_anything_2/README.md b/label_studio_ml/examples/segment_anything_2/README.md new file mode 100644 index 000000000..e5047e3c6 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/README.md @@ -0,0 +1,65 @@ +This guide describes the simplest way to start using **SegmentAnything 2** with Label Studio. + +## Using SAM2 with Label Studio (tutorial) +[![Connecting SAM2 Model to Label Studio for Image Annotation ](https://img.youtube.com/vi/FTg8P8z4RgY/0.jpg)](https://www.youtube.com/watch?v=FTg8P8z4RgY) + + +Note that as of 8/1/2024, SAM2 only runs on GPU. + +## Running from source + +1. To run the ML backend without Docker, you have to clone the repository and install all dependencies using pip: + +```bash +git clone https://github.com/HumanSignal/label-studio-ml-backend.git +cd label-studio-ml-backend +pip install -e . +cd label_studio_ml/examples/segment_anything_2 +pip install -r requirements.txt +``` + +2. Download [`segment-anything-2` repo](https://github.com/facebookresearch/segment-anything-2) into the root directory. Install SegmentAnything model and download checkpoints using [the official Meta documentation](https://github.com/facebookresearch/segment-anything-2?tab=readme-ov-file#installation) + + +3. Then you can start the ML backend: + +```bash +cd ../ +label-studio-ml start ./segment_anything_2 +``` + +## Running with Docker (coming soon) + +1. Start Machine Learning backend on `http://localhost:9090` with prebuilt image: + +```bash +docker-compose up +``` + +2. Validate that backend is running + +```bash +$ curl http://localhost:9090/ +{"status":"UP"} +``` + +3. Connect to the backend from Label Studio running on the same host: go to your project `Settings -> Machine Learning -> Add Model` and specify `http://localhost:9090` as a URL. + + +# Configuration +Parameters can be set in `docker-compose.yml` before running the container. + + +The following common parameters are available: +- `DEVICE` - specify the device for the model server (currently only `cuda` is supported, `cpu` is coming soon) +- `MODEL_CONFIG` - SAM2 model configuration file (`sam2_hiera_l.yaml` by default) +- `MODEL_CHECKPOINT` - SAM2 model checkpoint file (`sam2_hiera_large.pt` by default) +- `BASIC_AUTH_USER` - specify the basic auth user for the model server +- `BASIC_AUTH_PASS` - specify the basic auth password for the model server +- `LOG_LEVEL` - set the log level for the model server +- `WORKERS` - specify the number of workers for the model server +- `THREADS` - specify the number of threads for the model server + +# Customization + +The ML backend can be customized by adding your own models and logic inside the `./segment_anything_2` directory. \ No newline at end of file diff --git a/label_studio_ml/examples/segment_anything_2/_wsgi.py b/label_studio_ml/examples/segment_anything_2/_wsgi.py new file mode 100644 index 000000000..957b0dfe9 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/_wsgi.py @@ -0,0 +1,121 @@ +import os +import argparse +import json +import logging +import logging.config + +logging.config.dictConfig({ + "version": 1, + "formatters": { + "standard": { + "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s" + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": os.getenv('LOG_LEVEL'), + "stream": "ext://sys.stdout", + "formatter": "standard" + } + }, + "root": { + "level": os.getenv('LOG_LEVEL'), + "handlers": [ + "console" + ], + "propagate": True + } +}) + +from label_studio_ml.api import init_app +from model import NewModel + + +_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json') + + +def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH): + if not os.path.exists(config_path): + return dict() + with open(config_path) as f: + config = json.load(f) + assert isinstance(config, dict) + return config + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Label studio') + parser.add_argument( + '-p', '--port', dest='port', type=int, default=9090, + help='Server port') + parser.add_argument( + '--host', dest='host', type=str, default='0.0.0.0', + help='Server host') + parser.add_argument( + '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='), + help='Additional LabelStudioMLBase model initialization kwargs') + parser.add_argument( + '-d', '--debug', dest='debug', action='store_true', + help='Switch debug mode') + parser.add_argument( + '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None, + help='Logging level') + parser.add_argument( + '--model-dir', dest='model_dir', default=os.path.dirname(__file__), + help='Directory where models are stored (relative to the project directory)') + parser.add_argument( + '--check', dest='check', action='store_true', + help='Validate model instance before launching server') + parser.add_argument('--basic-auth-user', + default=os.environ.get('ML_SERVER_BASIC_AUTH_USER', None), + help='Basic auth user') + + parser.add_argument('--basic-auth-pass', + default=os.environ.get('ML_SERVER_BASIC_AUTH_PASS', None), + help='Basic auth pass') + + args = parser.parse_args() + + # setup logging level + if args.log_level: + logging.root.setLevel(args.log_level) + + def isfloat(value): + try: + float(value) + return True + except ValueError: + return False + + def parse_kwargs(): + param = dict() + for k, v in args.kwargs: + if v.isdigit(): + param[k] = int(v) + elif v == 'True' or v == 'true': + param[k] = True + elif v == 'False' or v == 'false': + param[k] = False + elif isfloat(v): + param[k] = float(v) + else: + param[k] = v + return param + + kwargs = get_kwargs_from_config() + + if args.kwargs: + kwargs.update(parse_kwargs()) + + if args.check: + print('Check "' + NewModel.__name__ + '" instance creation..') + model = NewModel(**kwargs) + + app = init_app(model_class=NewModel, basic_auth_user=args.basic_auth_user, basic_auth_pass=args.basic_auth_pass) + + app.run(host=args.host, port=args.port, debug=args.debug) + +else: + # for uWSGI use + app = init_app(model_class=NewModel) diff --git a/label_studio_ml/examples/segment_anything_2/docker-compose.yml b/label_studio_ml/examples/segment_anything_2/docker-compose.yml new file mode 100644 index 000000000..8df4a9d02 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/docker-compose.yml @@ -0,0 +1,41 @@ +version: "3.8" + +services: + ml-backend: + container_name: ml-backend + image: humansignal/ml-backend:v0 + build: + context: . + args: + TEST_ENV: ${TEST_ENV} + environment: + # specify these parameters if you want to use basic auth for the model server + - BASIC_AUTH_USER= + - BASIC_AUTH_PASS= + # set the log level for the model server + - LOG_LEVEL=DEBUG + # any other parameters that you want to pass to the model server + - ANY=PARAMETER + # specify the number of workers and threads for the model server + - WORKERS=1 + - THREADS=8 + # specify the model directory (likely you don't need to change this) + - MODEL_DIR=/data/models + # specify device + - DEVICE=cuda # or 'cpu' (coming soon) + # SAM2 model config + - MODEL_CONFIG=sam2_hiera_l.yaml + # SAM2 checkpoint + - MODEL_CHECKPOINT=sam2_hiera_large.pt + + # Specify the Label Studio URL and API key to access + # uploaded, local storage and cloud storage files. + # Do not use 'localhost' as it does not work within Docker containers. + # Use prefix 'http://' or 'https://' for the URL always. + # Determine the actual IP using 'ifconfig' (Linux/Mac) or 'ipconfig' (Windows). + - LABEL_STUDIO_URL= + - LABEL_STUDIO_API_KEY= + ports: + - "9090:9090" + volumes: + - "./data/server:/data" diff --git a/label_studio_ml/examples/segment_anything_2/model.py b/label_studio_ml/examples/segment_anything_2/model.py new file mode 100644 index 000000000..33e7c0236 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/model.py @@ -0,0 +1,156 @@ +import torch +import numpy as np +import os +import pathlib +from typing import List, Dict, Optional +from uuid import uuid4 +from label_studio_ml.model import LabelStudioMLBase +from label_studio_ml.response import ModelResponse +from label_studio_sdk.converter import brush +from label_studio_sdk._extensions.label_studio_tools.core.utils.io import get_local_path +from PIL import Image +from sam2.build_sam import build_sam2 +from sam2.sam2_image_predictor import SAM2ImagePredictor + + +DEVICE = os.getenv('DEVICE', 'cuda') +SEGMENT_ANYTHING_2_REPO_PATH = os.getenv('SEGMENT_ANYTHING_2_REPO_PATH', 'segment-anything-2') +MODEL_CONFIG = os.getenv('MODEL_CONFIG', 'sam2_hiera_l.yaml') +MODEL_CHECKPOINT = os.getenv('MODEL_CHECKPOINT', 'sam2_hiera_large.pt') + +if DEVICE == 'cuda': + # use bfloat16 for the entire notebook + torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__() + + if torch.cuda.get_device_properties(0).major >= 8: + # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices) + torch.backends.cuda.matmul.allow_tf32 = True + torch.backends.cudnn.allow_tf32 = True + + +# build path to the model checkpoint +sam2_checkpoint = str(pathlib.Path(__file__).parent / SEGMENT_ANYTHING_2_REPO_PATH / "checkpoints" / MODEL_CHECKPOINT) + +sam2_model = build_sam2(MODEL_CONFIG, sam2_checkpoint, device=DEVICE) + +predictor = SAM2ImagePredictor(sam2_model) + + +class NewModel(LabelStudioMLBase): + """Custom ML Backend model + """ + + def get_results(self, masks, probs, width, height, from_name, to_name, label): + results = [] + total_prob = 0 + for mask, prob in zip(masks, probs): + # creates a random ID for your label everytime so no chance for errors + label_id = str(uuid4())[:4] + # converting the mask from the model to RLE format which is usable in Label Studio + mask = mask * 255 + rle = brush.mask2rle(mask) + total_prob += prob + results.append({ + 'id': label_id, + 'from_name': from_name, + 'to_name': to_name, + 'original_width': width, + 'original_height': height, + 'image_rotation': 0, + 'value': { + 'format': 'rle', + 'rle': rle, + 'brushlabels': [label], + }, + 'score': prob, + 'type': 'brushlabels', + 'readonly': False + }) + + return [{ + 'result': results, + 'model_version': self.get('model_version'), + 'score': total_prob / max(len(results), 1) + }] + + def set_image(self, image_url): + image_path = get_local_path(image_url) + image = Image.open(image_path) + image = np.array(image.convert("RGB")) + predictor.set_image(image) + + def _sam_predict(self, img_url, point_coords=None, point_labels=None, input_box=None, task=None): + self.set_image(img_url) + point_coords = np.array(point_coords, dtype=np.float32) if point_coords else None + point_labels = np.array(point_labels, dtype=np.float32) if point_labels else None + input_box = np.array(input_box, dtype=np.float32) if input_box else None + + masks, scores, logits = predictor.predict( + point_coords=point_coords, + point_labels=point_labels, + box=input_box, + multimask_output=True + ) + sorted_ind = np.argsort(scores)[::-1] + masks = masks[sorted_ind] + scores = scores[sorted_ind] + mask = masks[0, :, :].astype(np.uint8) + prob = float(scores[0]) + # logits = logits[sorted_ind] + return { + 'masks': [mask], + 'probs': [prob] + } + + + def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> ModelResponse: + """ Returns the predicted mask for a smart keypoint that has been placed.""" + + from_name, to_name, value = self.get_first_tag_occurence('BrushLabels', 'Image') + + if not context or not context.get('result'): + # if there is no context, no interaction has happened yet + return ModelResponse(predictions=[]) + + image_width = context['result'][0]['original_width'] + image_height = context['result'][0]['original_height'] + + # collect context information + point_coords = [] + point_labels = [] + input_box = None + selected_label = None + for ctx in context['result']: + x = ctx['value']['x'] * image_width / 100 + y = ctx['value']['y'] * image_height / 100 + ctx_type = ctx['type'] + selected_label = ctx['value'][ctx_type][0] + if ctx_type == 'keypointlabels': + point_labels.append(int(ctx.get('is_positive', 0))) + point_coords.append([int(x), int(y)]) + elif ctx_type == 'rectanglelabels': + box_width = ctx['value']['width'] * image_width / 100 + box_height = ctx['value']['height'] * image_height / 100 + input_box = [int(x), int(y), int(box_width + x), int(box_height + y)] + + print(f'Point coords are {point_coords}, point labels are {point_labels}, input box is {input_box}') + + img_url = tasks[0]['data'][value] + predictor_results = self._sam_predict( + img_url=img_url, + point_coords=point_coords or None, + point_labels=point_labels or None, + input_box=input_box, + task=tasks[0] + ) + + predictions = self.get_results( + masks=predictor_results['masks'], + probs=predictor_results['probs'], + width=image_width, + height=image_height, + from_name=from_name, + to_name=to_name, + label=selected_label) + + return ModelResponse(predictions=predictions) diff --git a/label_studio_ml/examples/segment_anything_2/requirements-base.txt b/label_studio_ml/examples/segment_anything_2/requirements-base.txt new file mode 100644 index 000000000..68ce357c7 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/requirements-base.txt @@ -0,0 +1,2 @@ +gunicorn==22.0.0 +label-studio-ml @ git+https://github.com/HumanSignal/label-studio-ml-backend.git \ No newline at end of file diff --git a/label_studio_ml/examples/segment_anything_2/requirements-test.txt b/label_studio_ml/examples/segment_anything_2/requirements-test.txt new file mode 100644 index 000000000..cffeec658 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/requirements-test.txt @@ -0,0 +1,2 @@ +pytest +pytest-cov \ No newline at end of file diff --git a/label_studio_ml/examples/segment_anything_2/requirements.txt b/label_studio_ml/examples/segment_anything_2/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/label_studio_ml/examples/segment_anything_2/start.sh b/label_studio_ml/examples/segment_anything_2/start.sh new file mode 100755 index 000000000..449c16e31 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/start.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Execute the gunicorn command +exec gunicorn --bind :${PORT:-9090} --workers ${WORKERS:-1} --threads ${THREADS:-4} --timeout 0 _wsgi:app diff --git a/label_studio_ml/examples/segment_anything_2/test_api.py b/label_studio_ml/examples/segment_anything_2/test_api.py new file mode 100644 index 000000000..ca7767be1 --- /dev/null +++ b/label_studio_ml/examples/segment_anything_2/test_api.py @@ -0,0 +1,47 @@ +""" +This file contains tests for the API of your model. You can run these tests by installing test requirements: + + ```bash + pip install -r requirements-test.txt + ``` +Then execute `pytest` in the directory of this file. + +- Change `NewModel` to the name of the class in your model.py file. +- Change the `request` and `expected_response` variables to match the input and output of your model. +""" + +import pytest +import json +from model import NewModel + + +@pytest.fixture +def client(): + from _wsgi import init_app + app = init_app(model_class=NewModel) + app.config['TESTING'] = True + with app.test_client() as client: + yield client + + +def test_predict(client): + request = { + 'tasks': [{ + 'data': { + # Your input test data here + } + }], + # Your labeling configuration here + 'label_config': '' + } + + expected_response = { + 'results': [{ + # Your expected result here + }] + } + + response = client.post('/predict', data=json.dumps(request), content_type='application/json') + assert response.status_code == 200 + response = json.loads(response.data) + assert response == expected_response