Skip to content

Commit

Permalink
migration
Browse files Browse the repository at this point in the history
  • Loading branch information
guokan-shang committed Mar 30, 2023
1 parent 8760ecb commit 3e51d9d
Show file tree
Hide file tree
Showing 23 changed files with 1,317 additions and 111 deletions.
11 changes: 10 additions & 1 deletion .envdefault
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
# APPLICATION PARAMETERS
APP_LANG=fr en
ASSETS_PATH_ON_HOST=./assets
ASSETS_PATH_IN_CONTAINER=/app/assets
WORKER_NUMBER=1

# SERVING PARAMETERS
SERVICE_MODE=http
CONCURRENCY=1

# MICRO-SERVICE PARAMETERS
SERVICE_NAME=kpe
SERVICES_BROKER=redis://172.17.0.1:6379
BROKER_PASS=
20 changes: 20 additions & 0 deletions .github/workflows/dockerhub-description.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Update Docker Hub Description
on:
push:
branches:
- master
paths:
- README.md
- .github/workflows/dockerhub-description.yml
jobs:
dockerHubDescription:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Docker Hub Description
uses: peter-evans/dockerhub-description@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
repository: lintoai/linto-platform-nlp-keyphrase-extraction
readme-filepath: ./README.md
20 changes: 12 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
FROM lintoai/linto-platform-nlp-core:latest
LABEL maintainer="[email protected]"

WORKDIR /app
WORKDIR /usr/src/app

COPY ./requirements.txt /app/
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

COPY .envdefault /app/
COPY ./scripts /app/scripts
COPY ./components /app/components
COPY kpe /usr/src/app/kpe
COPY components /usr/src/app/components
COPY celery_app /usr/src/app/celery_app
COPY http_server /usr/src/app/http_server
COPY document /usr/src/app/document
COPY docker-entrypoint.sh wait-for-it.sh healthcheck.sh ./

HEALTHCHECK --interval=15s CMD curl -fs http://0.0.0.0/health || exit 1
ENV PYTHONPATH="${PYTHONPATH}:/usr/src/app/kpe"

ENTRYPOINT ["/opt/conda/bin/gunicorn", "scripts.main:app", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80", "--access-logfile", "-", "--error-logfile", "-"]
CMD ["--workers", "1"]
HEALTHCHECK CMD ./healthcheck.sh

ENTRYPOINT ["./docker-entrypoint.sh"]
661 changes: 661 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# 0.2.0
- Migration to the [template]((https://github.com/linto-ai/linto-template-microservice)) of LinTO microservices.

# 0.1.0
- Initial commit.
- Keyphrase Extraction.
File renamed without changes.
28 changes: 28 additions & 0 deletions celery_app/celeryapp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
from celery import Celery

from kpe import logger

celery = Celery(__name__, include=['celery_app.tasks'])
service_name = os.environ.get("SERVICE_NAME", "kpe")
broker_url = os.environ.get("SERVICES_BROKER")
if os.environ.get("BROKER_PASS", False):
components = broker_url.split('//')
broker_url = f'{components[0]}//:{os.environ.get("BROKER_PASS")}@{components[1]}'
celery.conf.broker_url = "{}/0".format(broker_url)
celery.conf.result_backend = "{}/1".format(broker_url)
celery.conf.update(
result_expires=3600,
task_acks_late=True,
task_track_started = True)

# Queues
celery.conf.update(
{'task_routes': {
'kpe_task' : {'queue': 'kpe'},}
}
)

logger.info(
f"Celery configured for broker located at {broker_url} with service name {service_name}"
)
34 changes: 34 additions & 0 deletions celery_app/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import spacy
import components

from typing import Dict, List

from celery_app.celeryapp import celery

from kpe import logger
from kpe.processing import LM_MAP, MODELS, get_model
from kpe.processing.utils import get_data


@celery.task(name="kpe_task")
def kpe_task(lang: str, texts: List[str], component_cfg: Dict = {}):
"""Process a batch of articles and return the Keyphrases predicted by the
given model. Each record in the data should have a key "text".
"""

# Check language availability
if lang in LM_MAP.keys():
model_name = LM_MAP[lang]
if model_name not in MODELS.keys():
raise RuntimeError(f"Model {model_name} for language {lang} is not loaded.")
nlp = spacy.blank(lang)
nlp.add_pipe("kpe", config={"model": {"@misc": "get_model", "name": model_name}})
else:
raise ValueError(f"Language {lang} is not supported.")

response_body = []

for doc in nlp.pipe(texts, component_cfg=component_cfg):
response_body.append(get_data(doc))

return {"kpe": response_body}
3 changes: 1 addition & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,4 @@ services:
- .env
volumes:
- $ASSETS_PATH_ON_HOST:$ASSETS_PATH_IN_CONTAINER:ro
command: ["--workers", $WORKER_NUMBER]
runtime: nvidia
#runtime: nvidia
32 changes: 32 additions & 0 deletions docker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
set -ea

echo "RUNNING SERVICE"

# Launch parameters, environement variables and dependencies check
if [ -z "$SERVICE_MODE" ]
then
echo "ERROR: Must specify a serving mode: [ http | task ]"
exit -1
else
if [ "$SERVICE_MODE" = "http" ]
then
echo "RUNNING HTTP SERVER"
python http_server/ingress.py --debug
elif [ "$SERVICE_MODE" == "task" ]
then
if [[ -z "$SERVICES_BROKER" ]]
then
echo "ERROR: SERVICES_BROKER variable not specified, cannot start celery worker."
return -1
fi
/usr/src/app/wait-for-it.sh $(echo $SERVICES_BROKER | cut -d'/' -f 3) --timeout=20 --strict -- echo " $SERVICES_BROKER (Service Broker) is up"
echo "RUNNING CELERY WORKER"
celery --app=celery_app.celeryapp worker -Ofair -n nlp_${SERVICE_NAME}_worker@%h --queues=${SERVICE_NAME} -c ${CONCURRENCY}
else
echo "ERROR: Wrong serving command: $1"
exit -1
fi
fi

echo "Service stopped"
83 changes: 83 additions & 0 deletions document/swagger.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
openapi: 3.0.1
info:
title: Keyphrase Extraction API
description: API to detect keyphrases in text.
version: 0.2.0

servers:
- url: /

paths:
/kpe/{lang}:
post:
tags:
- Keyphrase Extraction API
summary: Perform Keyphrase Extraction
parameters:
- name: lang
in: path
required: true
description: Language
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/request'
responses:
200:
description: "Job successfully finished"
content:
application/json:
schema:
$ref: '#/components/schemas/responsemodel'
400:
description: "Bad request"
500:
description: "Server error"

components:
schemas:
article:
type: object
properties:
text:
type: string
default: This is an article.
request:
type: object
properties:
articles:
type: array
required: true
items:
$ref: '#/components/schemas/article'
component_cfg:
type: object

keyphrase:
type: object
properties:
text:
type: string
end:
score: float
batch:
type: object
properties:
text:
type: string
keyphrases:
type: array
items:
$ref: '#/components/schemas/keyphrase'

responsemodel:
type: object
properties:
ner:
type: array
items:
$ref: '#/components/schemas/batch'

10 changes: 10 additions & 0 deletions healthcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

set -eax

if [ "$SERVICE_MODE" = "http" ]
then
curl --fail http://localhost:80/healthcheck || exit 1
else
celery --app=celery_app.celeryapp inspect ping -d ${SERVICE_NAME}_worker@$HOSTNAME || exit 1
fi
Empty file added http_server/__init__.py
Empty file.
51 changes: 51 additions & 0 deletions http_server/confparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import argparse

__all__ = ["createParser"]

def createParser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()

# SERVICE
parser.add_argument(
'--service_name',
type=str,
help='Service Name',
default=os.environ.get('SERVICE_NAME', 'kpe'))

#GUNICORN
parser.add_argument(
'--service_port',
type=int,
help='Service port',
default=80)
parser.add_argument(
'--workers',
type=int,
help="Number of Gunicorn workers (default=CONCURRENCY + 1)",
default=int(os.environ.get('CONCURRENCY', 1)) + 1)

#SWAGGER
parser.add_argument(
'--swagger_url',
type=str,
help='Swagger interface url',
default='/docs')
parser.add_argument(
'--swagger_prefix',
type=str,
help='Swagger prefix',
default=os.environ.get('SWAGGER_PREFIX', ''))
parser.add_argument(
'--swagger_path',
type=str,
help='Swagger file path',
default=os.environ.get('SWAGGER_PATH', '/usr/src/app/document/swagger.yml'))

#MISC
parser.add_argument(
'--debug',
action='store_true',
help='Display debug logs')

return parser
Loading

0 comments on commit 3e51d9d

Please sign in to comment.