From 9912ce3b513e3dc6689b1706b545e423b0184574 Mon Sep 17 00:00:00 2001 From: Rudy Baraglia Date: Wed, 26 Oct 2022 08:13:14 +0000 Subject: [PATCH] 1.1.0 - See RELEASE.md --- .env_default_http | 10 +++ .env_default_task | 15 ++++ Dockerfile | 10 ++- README.md | 160 ++++++++++++++++++++++++++++---------- RELEASE.md | 5 ++ celery_app/register.py | 89 +++++++++++++++++++++ celery_app/tasks.py | 6 +- docker-compose.yml | 16 ++++ docker-entrypoint.sh | 22 +++++- healthcheck.sh | 6 +- http_server/confparser.py | 4 +- http_server/ingress.py | 6 +- punctuation/__init__.py | 3 +- 13 files changed, 300 insertions(+), 52 deletions(-) create mode 100644 .env_default_http create mode 100644 .env_default_task create mode 100644 celery_app/register.py create mode 100644 docker-compose.yml diff --git a/.env_default_http b/.env_default_http new file mode 100644 index 0000000..a837831 --- /dev/null +++ b/.env_default_http @@ -0,0 +1,10 @@ +# SERVING PARAMETERS +SERVICE_MODE=task +MODEL_TYPE=lin + +# SERVICE DISCOVERY +SERVICE_NAME=MY_PUNCTUATION_SERVICE +LANGUAGE= + +# CONCURRENCY +CONCURRENCY=2 \ No newline at end of file diff --git a/.env_default_task b/.env_default_task new file mode 100644 index 0000000..b60f87d --- /dev/null +++ b/.env_default_task @@ -0,0 +1,15 @@ +# SERVING PARAMETERS +SERVICE_MODE=task + +# SERVICE PARAMETERS +SERVICES_BROKER=redis://192.168.0.1:6379 +BROKER_PASS=password + +# SERVICE DISCOVERY +SERVICE_NAME=MY_PUNCTUATION_SERVICE +LANGUAGE=en-US/fr-FR/* +QUEUE_NAME=(Optionnal) +MODEL_INFO=This model does something + +# CONCURRENCY +CONCURRENCY=2 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 84f79a4..81625a8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ -FROM python:3.8 -LABEL maintainer="stanfous@linagora.com, rbaraglia@linagora.com" +FROM python:3.9 +LABEL maintainer="rbaraglia@linagora.com" ENV PYTHONUNBUFFERED TRUE +ENV IMAGE_NAME linto-platform-diarization RUN apt-get update \ && apt-get install --no-install-recommends -y \ @@ -28,9 +29,12 @@ COPY punctuation /usr/src/app/punctuation RUN mkdir /usr/src/app/model-store RUN mkdir -p /usr/src/app/tmp COPY config.properties /usr/src/app/config.properties - +COPY RELEASE.md ./ COPY docker-entrypoint.sh wait-for-it.sh healthcheck.sh ./ +# Grep CURRENT VERSION +RUN export VERSION=$(awk -v RS='' '/#/ {print; exit}' RELEASE.md | head -1 | sed 's/#//' | sed 's/ //') + ENV PYTHONPATH="${PYTHONPATH}:/usr/src/app/punctuation" HEALTHCHECK CMD ./healthcheck.sh diff --git a/README.md b/README.md index 6502ea5..a75572d 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,41 @@ # LINTO-PLATFORM-PUNCTUATION -LinTO-platform-punctuation is the punctuation service within the [LinTO stack](https://github.com/linto-ai/linto-platform-stack). - -The Punctuation is configured with an .mar BERT model. - -LinTO-platform-puntuation can either be used as a standalone punctuation service or deployed within a micro-services infrastructure using a message broker connector. +LinTO-platform-punctuation is a LinTO service for punctuation prediction. It predicts punctuation from raw text or raw transcription. + +LinTO-platform-punctuation can either be used as a standalone punctuation service or deployed as a micro-service. + +## Table of content +* [Prerequisites](#pre-requisites) + * [Models](#models) +* [Deploy](#deploy) + * [HTTP](#http-api) + * [MicroService](#micro-service) +* [Usage](#usages) + * [HTTP API](#http-api) + * [/healthcheck](#healthcheck) + * [/punctation](#punctuation) + * [/docs](#docs) + * [Using celery](#using-celery) + +* [License](#license) +*** ## Pre-requisites -### Model -The punctuation service relies on a BERT model. +### Models +The punctuation service relies on a trained punctuation prediction model. -We provide some models on [dl.linto.ai](https://dl.linto.ai/downloads/model-distribution/punctuation_models/). +We provide homebrew models on [dl.linto.ai](https://dl.linto.ai/downloads/model-distribution/punctuation_models/). ### Docker The punctuation service requires docker up and running. ### (micro-service) Service broker -The punctuation only entry point in job mode are tasks posted on a message broker. Supported message broker are RabbitMQ, Redis, Amazon SQS. +The punctuation only entry point in job mode are tasks posted on a REDIS message broker using [Celery](https://github.com/celery/celery). -## Deploy linto-platform-punctuation -linto-platform-punctuation can be deployed two ways: +## Deploy +linto-platform-punctuation can be deployed two different ways: * As a standalone punctuation service through an HTTP API. -* As a micro-service connected to a message broker. +* As a micro-service connected to a task queue. **1- First step is to build the image:** @@ -31,56 +45,122 @@ cd linto-platform-punctuation docker build . -t linto-platform-punctuation:latest ``` +or +```bash +docker pull registry.linto.ai/lintoai/linto-platform-punctuation:latest +``` + **2- Download the models** Have the punctuation model (.mar) ready at MODEL_PATH. -### HTTP API +### HTTP +**1- Fill the .env** ```bash -docker run --rm \ --v MODEL_PATH:/usr/src/app/model-store/punctuation.mar \ ---env CONCURRENCY=1 \ ---env LANGUAGE=fr_FR \ ---env SERVICE_MODE=http \ -linto-platform-punctuation:latest +cp .env_default_http .env ``` -This will run a container providing an http API binded on the host HOST_SERVING_PORT port. +Fill the .env with your values. **Parameters:** | Variables | Description | Example | |:-|:-|:-| -| MODEL_PATH | Your localy available model (.mar) | /my/path/to/models/punctuation.mar | -| LANGUAGE | Language code as a BCP-47 code | en-US, fr_FR, ... | -| CONCURRENCY | Number of worker | 1 | - -### Micro-service within LinTO-Platform stack ->LinTO-platform-punctuation can be deployed within the linto-platform-stack through the use of linto-platform-services-manager. Used this way, the container spawn celery worker waiting for punctuation task on a message broker. ->LinTO-platform-punctuation in task mode is not intended to be launch manually. ->However, if you intent to connect it to your custom message's broker here are the parameters: +| SERVICE_NAME | The service's name | my_punctuation_service | +| CONCURRENCY | Number of worker | > 1 | -You need a message broker up and running at MY_SERVICE_BROKER. +**2- Run with docker** ```bash docker run --rm \ -v MODEL_PATH:/usr/src/app/model-store/punctuation.mar \ ---env SERVICES_BROKER=redis://MY_BROKER:BROKER_PORT \ ---env BROKER_PASS=password \ ---env CONCURRENCY=1 \ ---env LANGUAGE=fr_FR \ ---env SERVICE_MODE=task \ +-p HOST_SERVING_PORT:80 \ +--env-file .env \ linto-platform-punctuation:latest ``` +This will run a container providing an http API binded on the host HOST_SERVING_PORT port. + + +### Micro-service +>LinTO-platform-punctuation can be deployed as a microservice. Used this way, the container spawn celery workers waiting for punctuation tasks on a dedicated task queue. +>LinTO-platform-punctuation in task mode requires a configured REDIS broker. + +You need a message broker up and running at MY_SERVICE_BROKER. Instance are typically deployed as services in a docker swarm using the docker compose command: + +**1- Fill the .env** +```bash +cp .env_default_task .env +``` + +Fill the .env with your values. + **Parameters:** | Variables | Description | Example | |:-|:-|:-| -| MODEL_PATH | Your localy available model (.mar) | /my/path/to/models/punctuation.mar | | SERVICES_BROKER | Service broker uri | redis://my_redis_broker:6379 | | BROKER_PASS | Service broker password (Leave empty if there is no password) | my_password | -| LANGUAGE | Punctuation language | en-US | -| CONCURRENCY | Number of worker (1 worker = 1 cpu) | [ 1 -> numberOfCPU] | +| QUEUE_NAME | (Optionnal) overide the generated queue's name (See Queue name bellow) | my_queue | +| SERVICE_NAME | Service's name | punctuation-ml | +| LANGUAGE | Language code as a BCP-47 code | en-US or * or languages separated by "|" | +| MODEL_INFO | Human readable description of the model | "Bert based model for french punctuation prediction" | +| CONCURRENCY | Number of worker (1 worker = 1 cpu) | >1 | + +> Do not use spaces or character "_" for SERVICE_NAME or language. + +**2- Fill the docker-compose.yml** + +`#docker-compose.yml` +```yaml +version: '3.7' + +services: + punctuation-service: + image: linto-platform-punctuation:latest + volumes: + - /my/path/to/models/punctuation.mar:/usr/src/app/model-store/punctuation.mar + env_file: .env + deploy: + replicas: 1 + networks: + - your-net + +networks: + your-net: + external: true +``` + +**2- Run with docker compose** + +```bash +docker stack deploy --resolve-image always --compose-file docker-compose.yml your_stack +``` + +**Queue name:** + +By default the service queue name is generated using SERVICE_NAME and LANGUAGE: `punctuation_{LANGUAGE}_{SERVICE_NAME}`. + +The queue name can be overided using the QUEUE_NAME env variable. + +**Service discovery:** + +As a micro-service, the instance will register itself in the service registry for discovery. The service information are stored as a JSON object in redis's db0 under the id `service:{HOST_NAME}`. + +The following information are registered: + +```json +{ + "service_name": $SERVICE_NAME, + "host_name": $HOST_NAME, + "service_type": "punctuation", + "service_language": $LANGUAGE, + "queue_name": $QUEUE_NAME, + "version": "1.2.0", # This repository's version + "info": "Bert Based Punctuation model for french punctuation prediction", + "last_alive": 65478213, + "concurrency": 1 +} +``` ## Usages @@ -123,10 +203,10 @@ Return the punctuated text as a json object structured as follows: #### /docs The /docs route offers a OpenAPI/swagger interface. -### Through the message broker +### Using Celery -Punctuation-Worker accepts requests with the following arguments: -```file_path: str, with_metadata: bool``` +Punctuation-Worker accepts celery tasks with the following arguments: +```text: Union[str, List[str]]``` * text: (str or list) A sentence or a list of sentences. diff --git a/RELEASE.md b/RELEASE.md index 6721395..de33f05 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,8 @@ +# 1.1.0 +- Added service registration +- Updated README +- Modified entrypoint and healthcheck to include service registration. + # 1.0.1 - Changes behavior on prediction error from failed to ignore. - Adds makefile for code styling (PEP 8) diff --git a/celery_app/register.py b/celery_app/register.py new file mode 100644 index 0000000..7100440 --- /dev/null +++ b/celery_app/register.py @@ -0,0 +1,89 @@ +"""The register Module allow registering and unregistering operations within the service stack for service discovery purposes""" +import os +import sys +import uuid +from socket import gethostname +from time import time + +import redis +from redis.commands.json.path import Path +from redis.commands.search.field import NumericField, TextField +from redis.commands.search.indexDefinition import IndexDefinition, IndexType + +SERVICE_DISCOVERY_DB = 0 +SERVICE_TYPE = "punctuation" + +service_name = os.environ.get("SERVICE_NAME", SERVICE_TYPE) +service_lang = os.environ.get("LANGUAGE", "?") +host_name = gethostname() + + +def register(is_heartbeat: bool = False) -> bool: + """Registers the service and act as heartbeat. + + Returns: + bool: registering status + """ + host, port = os.environ.get("SERVICES_BROKER").split("//")[1].split(":") + password = os.environ.get("BROKER_PASS", None) + r = redis.Redis( + host=host, port=int(port), db=SERVICE_DISCOVERY_DB, password=password + ) + + res = r.json().set(f"service:{host_name}", Path.root_path(), service_info()) + if is_heartbeat: + return res + else: + print(f"Service registered as service:{host_name}") + schema = ( + TextField("$.service_name", as_name="service_name"), + TextField("$.service_type", as_name="service_type"), + TextField("$.service_language", as_name="service_language"), + TextField("$.queue_name", as_name="queue_name"), + TextField("$.version", as_name="version"), + TextField("$.info", as_name="info"), + NumericField("$.last_alive", as_name="last_alive"), + NumericField("$.concurrency", as_name="concurrency"), + ) + try: + r.ft().create_index( + schema, + definition=IndexDefinition(prefix=["service:"], index_type=IndexType.JSON), + ) + except Exception as error: + print(f"Index service already exist") + return res + + +def unregister() -> None: + """Un-register the service""" + try: + host, port = os.environ.get("SERVICES_BROKER").split("//")[1].split(":") + r = redis.Redis( + host=host, port=int(port), db=SERVICE_DISCOVERY_DB, password="password" + ) + r.json().delete(f"service:{host_name}") + except Exception as error: + print(f"Failed to unregister: {repr(error)}") + + +def queue() -> str: + return os.environ.get("QUEUE_NAME", f"{SERVICE_TYPE}_{service_lang}_{service_name}") + + +def service_info() -> dict: + return { + "service_name": service_name, + "host_name": host_name, + "service_type": SERVICE_TYPE, + "service_language": service_lang, + "queue_name": queue(), + "version": "1.2.0", + "info": os.environ.get("MODEL_INFO", "unknown"), + "last_alive": int(time()), + "concurrency": int(os.environ.get("CONCURRENCY")), + } + + +if __name__ == "__main__": + sys.exit(register()) diff --git a/celery_app/tasks.py b/celery_app/tasks.py index d430a5c..9093020 100644 --- a/celery_app/tasks.py +++ b/celery_app/tasks.py @@ -44,4 +44,8 @@ def punctuation_task(self, text: Union[str, list]): punctuated_sentence = punctuated_sentence[0].upper() + punctuated_sentence[1:] punctuated_sentences.append(punctuated_sentence) - return punctuated_sentences[0] if len(punctuated_sentences) == 1 else punctuated_sentences + return ( + punctuated_sentences[0] + if len(punctuated_sentences) == 1 + else punctuated_sentences + ) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1b4dcc9 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +version: '3.7' + +services: + punctuation-service: + image: linto-platform-punctuation:latest + volumes: + - /path/to/your/model.mar/usr/src/app/model-store/punctuation.mar + env_file: .env + deploy: + replicas: 1 + networks: + - your-net + +networks: + your-net: + external: true diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 013b672..94fe59b 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,24 +1,40 @@ #!/bin/bash -set -e echo "RUNNING service" -#supervisord -c /usr/src/app/supervisor/supervisor.conf + +export VERSION=$(awk -v RS='' '/#/ {print; exit}' RELEASE.md | head -1 | sed 's/#//' | sed 's/ //') if [ -z "$SERVICE_MODE" ] then echo "ERROR: Must specify a serving mode: [ http | task ]" exit -1 else + # Model serving torchserve --start --ncs --ts-config /usr/src/app/config.properties if [ "$SERVICE_MODE" = "http" ] then echo "Running http server" + # HTTP API python http_server/ingress.py --debug elif [ "$SERVICE_MODE" == "task" ] then echo "Running celery worker" /usr/src/app/wait-for-it.sh $(echo $SERVICES_BROKER | cut -d'/' -f 3) --timeout=20 --strict -- echo " $SERVICES_BROKER (Service Broker) is up" - celery --app=celery_app.celeryapp worker -n punctuation_$LANGUAGE@%h --queues=punctuation_$LANGUAGE -c $CONCURRENCY + # MICRO SERVICE + ## QUEUE NAME + QUEUE=$(python -c "from celery_app.register import queue; exit(queue())" 2>&1) + echo "Service set to $QUEUE" + + ## REGISTRATION + python -c "from celery_app.register import register; register()" + echo "Service registered" + + ## WORKER + celery --app=celery_app.celeryapp worker -n punctuation_$SERVICE_NAME@%h --queues=$QUEUE -c $CONCURRENCY + + ## UNREGISTERING + python -c "from celery_app.register import unregister; unregister()" + echo "Service unregistered" else echo "ERROR: Wrong serving command: $SERVICE_MODE" exit -1 diff --git a/healthcheck.sh b/healthcheck.sh index 275c477..611ada3 100755 --- a/healthcheck.sh +++ b/healthcheck.sh @@ -6,5 +6,9 @@ if [ "$SERVICE_MODE" = "http" ] then curl --fail http://localhost:80/healthcheck || exit 1 else - celery --app=celery_app.celeryapp inspect ping -d punctuation_$LANGUAGE@$HOSTNAME || exit 1 + # Update last alive + python -c "from celery_app.register import register; register(is_heartbeat=True)" + + # Ping worker + celery --app=celery_app.celeryapp inspect ping -d punctuation_$SERVICE_NAME@$HOSTNAME || exit 1 fi diff --git a/http_server/confparser.py b/http_server/confparser.py index 13be349..5972be5 100644 --- a/http_server/confparser.py +++ b/http_server/confparser.py @@ -25,7 +25,9 @@ def createParser() -> argparse.ArgumentParser: ) # SWAGGER - parser.add_argument("--swagger_url", type=str, help="Swagger interface url", default="/docs") + parser.add_argument( + "--swagger_url", type=str, help="Swagger interface url", default="/docs" + ) parser.add_argument( "--swagger_prefix", type=str, diff --git a/http_server/ingress.py b/http_server/ingress.py index fc9cde7..66c40d0 100644 --- a/http_server/ingress.py +++ b/http_server/ingress.py @@ -5,7 +5,7 @@ import requests from confparser import createParser -from flask import Flask, Response, json, request +from flask import Flask, json, request from serving import GunicornServing from swagger import setupSwaggerUI @@ -61,7 +61,9 @@ def punctuate(): if result.status_code == 200: punctuated_sentence = result.text # First letter in capital - punctuated_sentence = punctuated_sentence[0].upper() + punctuated_sentence[1:] + punctuated_sentence = ( + punctuated_sentence[0].upper() + punctuated_sentence[1:] + ) punctuated_sentences.append(punctuated_sentence) else: raise Exception(result.text) diff --git a/punctuation/__init__.py b/punctuation/__init__.py index c468019..a05ea06 100644 --- a/punctuation/__init__.py +++ b/punctuation/__init__.py @@ -2,6 +2,7 @@ import os logging.basicConfig( - format="%(asctime)s %(name)s %(levelname)s: %(message)s", datefmt="%d/%m/%Y %H:%M:%S" + format="%(asctime)s %(name)s %(levelname)s: %(message)s", + datefmt="%d/%m/%Y %H:%M:%S", ) logger = logging.getLogger("__punctuation__")