diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f40fe05
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+assets
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..3a4d02c
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,18 @@
+FROM lintoai/linto-platform-nlp-core:latest
+LABEL maintainer="gshang@linagora.com"
+
+WORKDIR /app
+
+VOLUME /app/assets
+ENV ASSETS_PATH=/app/assets
+
+COPY ./requirements.txt /app/
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY ./scripts /app/scripts
+COPY ./components /app/components
+
+HEALTHCHECK --interval=15s CMD curl -fs http://0.0.0.0/health || exit 1
+
+ENTRYPOINT ["/home/user/miniconda/bin/uvicorn", "scripts.main:app", "--host", "0.0.0.0", "--port", "80"]
+CMD ["--workers", "1"]
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
new file mode 100644
index 0000000..207eeea
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,51 @@
+pipeline {
+    agent any
+    environment {
+        DOCKER_HUB_REPO = "lintoai/linto-platform-nlp-keyphrase-extraction"
+        DOCKER_HUB_CRED = 'docker-hub-credentials'
+        
+        VERSION = ''
+    }
+
+    stages{
+        stage('Docker build for master branch'){
+            when{
+                branch 'master'
+            }
+            steps {
+                echo 'Publishing latest'
+                script {
+                    image = docker.build(env.DOCKER_HUB_REPO)
+                    VERSION = sh(
+                        returnStdout: true, 
+                        script: "awk -v RS='' '/#/ {print; exit}' RELEASE.md | head -1 | sed 's/#//' | sed 's/ //'"
+                    ).trim()
+
+                    docker.withRegistry('https://registry.hub.docker.com', env.DOCKER_HUB_CRED) {
+                        image.push("${VERSION}")
+                        image.push('latest')
+                    }
+                }
+            }
+        }
+
+        stage('Docker build for next (unstable) branch'){
+            when{
+                branch 'next'
+            }
+            steps {
+                echo 'Publishing unstable'
+                script {
+                    image = docker.build(env.DOCKER_HUB_REPO)
+                    VERSION = sh(
+                        returnStdout: true, 
+                        script: "awk -v RS='' '/#/ {print; exit}' RELEASE.md | head -1 | sed 's/#//' | sed 's/ //'"
+                    ).trim()
+                    docker.withRegistry('https://registry.hub.docker.com', env.DOCKER_HUB_CRED) {
+                        image.push('latest-unstable')
+                    }
+                }
+            }
+        }
+    }// end stages
+}
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4614a57
--- /dev/null
+++ b/README.md
@@ -0,0 +1,201 @@
+# linto-platform-nlp-keyphrase-extraction
+
+## Description
+This repository is for building a Docker image for LinTO's NLP service: Keyphrase Extraction on the basis of [linto-platform-nlp-core](https://github.com/linto-ai/linto-platform-nlp-core), can be deployed along with [LinTO stack](https://github.com/linto-ai/linto-platform-stack) or in a standalone way (see Develop section in below).
+
+linto-platform-nlp-keyphrase-extraction is backed by [spaCy](https://spacy.io/) v3.0+ featuring transformer-based pipelines, thus deploying with GPU support is highly recommeded for inference efficiency.
+
+LinTo's NLP services adopt the basic design concept of spaCy: [component and pipeline](https://spacy.io/usage/processing-pipelines), componets are decoupled from the service and can be easily re-used in other projects, components are organised into pipelines for realising specific NLP tasks. 
+
+This service uses [FastAPI](https://fastapi.tiangolo.com/) to serve custom spaCy's components as pipelines:
+- `kpe`: Keyphrase Extraction
+
+## Usage
+
+See documentation : [https://doc.linto.ai](https://doc.linto.ai)
+
+## Deploy
+
+With our proposed stack [https://github.com/linto-ai/linto-platform-stack](https://github.com/linto-ai/linto-platform-stack)
+
+# Develop
+
+## Build and run
+1 Create a named volume for storaging models.
+```bash
+sudo docker volume create linto-platform-nlp-assets
+```
+
+2 Download models into `assets/` on the host machine, make sure that `git-lfs`: [Git Large File Storage](https://git-lfs.github.com/) is installed and availble at `/usr/local/bin/git-lfs`.
+```bash
+cd linto-platform-nlp-keyphrase-extraction/
+bash scripts/download_models.sh
+```
+
+3 Copy downloaded models into created volume `linto-platform-nlp-assets`
+```bash
+sudo docker container create --name cp_helper -v linto-platform-nlp-assets:/root hello-world
+sudo docker cp assets/* cp_helper:/root
+sudo docker rm cp_helper
+```
+
+4 Build image
+```bash
+sudo docker build --tag lintoai/linto-platform-keyphrase-extraction:latest .
+```
+
+5 Run container (with GPU), make sure that [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian) and GPU driver are installed.
+```bash
+sudo docker run --gpus all \
+--rm -d -p 80:80 \
+-v linto-platform-nlp-assets:/app/assets:ro \
+--env APP_LANG="fr en" \
+lintoai/linto-platform-keyphrase-extraction:latest
+```
+<details>
+  <summary>Check running with CPU only setting</summary>
+  
+  ```bash
+sudo docker run \
+--rm -d -p 80:80 \
+-v linto-platform-nlp-assets:/app/assets:ro \
+--env APP_LANG="fr en" \
+lintoai/linto-platform-keyphrase-extraction:latest
+  ```
+</details>
+To specify running language of the container, modify APP_LANG="fr en", APP_LANG="fr", etc.
+
+To lanche with multiple workers, add `--workers INTEGER` in the end of the above command.
+
+6 Navigate to `http://localhost/docs` or `http://localhost/redoc` in your browser, to explore the REST API interactively. See the examples for how to query the API.
+
+
+## Specification for `http://localhost/kpe/{lang}`
+
+### Supported languages
+| {lang} | Model | Size |
+| --- | --- | --- |
+| `en` | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | 80 MB |
+| `fr` | [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) | 418 MB |
+
+### Request
+```json
+{
+  "articles": [
+    {
+      "text": "Apple Inc. is an American multinational technology company that specializes in consumer electronics, computer software and online services."
+    },
+    {
+      "text": "Unsupervised learning is a type of machine learning in which the algorithm is not provided with any pre-assigned labels or scores for the training data. As a result, unsupervised learning algorithms must first self-discover any naturally occurring patterns in that training data set."
+    }
+  ]
+}
+```
+
+### Response
+```json
+{
+  "kpe": [
+    {
+      "text": "Apple Inc. is an American multinational technology company that specializes in consumer electronics, computer software and online services.",
+      "keyphrases": [
+        {
+          "text": "apple",
+          "score": 0.6539
+        },
+        {
+          "text": "inc",
+          "score": 0.3941
+        },
+        {
+          "text": "company",
+          "score": 0.2985
+        },
+        {
+          "text": "multinational",
+          "score": 0.2635
+        },
+        {
+          "text": "electronics",
+          "score": 0.2143
+        }
+      ]
+    },
+    {
+      "text": "Unsupervised learning is a type of machine learning in which the algorithm is not provided with any pre-assigned labels or scores for the training data. As a result, unsupervised learning algorithms must first self-discover any naturally occurring patterns in that training data set.",
+      "keyphrases": [
+        {
+          "text": "unsupervised",
+          "score": 0.6663
+        },
+        {
+          "text": "learning",
+          "score": 0.3155
+        },
+        {
+          "text": "algorithms",
+          "score": 0.3128
+        },
+        {
+          "text": "algorithm",
+          "score": 0.2494
+        },
+        {
+          "text": "patterns",
+          "score": 0.2476
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Component configuration
+This is a component wrapped on the basis of [KeyBERT](https://github.com/MaartenGr/KeyBERT).
+
+| Parameter | Type | Default value | Description |
+| --- | --- | --- | --- |
+| candidates | List[str] | null | Candidate keywords/keyphrases to use instead of extracting them from the document(s) |
+| diversity | Float | 0.5 | The diversity of results between 0 and 1 if use_mmr is True |
+| keyphrase_ngram_range | Tuple[int, int] | [1,1] | Length, in words, of the extracted keywords/keyphrases |
+| min_df | int | 1 | Minimum document frequency of a word across all documents if keywords for multiple documents need to be extracted |
+| nr_candidates | int | 20 | The number of candidates to consider if use_maxsum is set to True |
+| seed_keywords | List[str] | null | Seed keywords that may guide the extraction of keywords by steering the similarities towards the seeded keywords |
+| stop_words | Union[str, List[str]] | null | Stopwords to remove from the document |
+| top_n | int | 5 | Return the top n keywords/keyphrases |
+| use_maxsum | bool | false | Whether to use Max Sum Similarity for the selection of keywords/keyphrases |
+| use_mmr | bool | false | Whether to use Maximal Marginal Relevance (MMR) for the selection of keywords/keyphrases |
+
+Component's config can be modified in [`components/config.cfg`](components/config.cfg) for default values, or on the per API request basis at runtime:
+
+```json
+{
+  "articles": [
+    {
+      "text": "Unsupervised learning is a type of machine learning in which the algorithm is not provided with any pre-assigned labels or scores for the training data. As a result, unsupervised learning algorithms must first self-discover any naturally occurring patterns in that training data set."
+    }
+  ],
+  "component_cfg": {
+    "kpe": {"keyphrase_ngram_range": [2,2], "top_n": 1}
+  }
+}
+```
+
+```json
+{
+  "kpe": [
+    {
+      "text": "Unsupervised learning is a type of machine learning in which the algorithm is not provided with any pre-assigned labels or scores for the training data. As a result, unsupervised learning algorithms must first self-discover any naturally occurring patterns in that training data set.",
+      "keyphrases": [
+        {
+          "text": "unsupervised learning",
+          "score": 0.7252
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Advanced usage
+For advanced usage, such as Max Sum Similarity and Maximal Marginal Relevance for diversifying extraction results, please refer to the documentation of [KeyBERT](https://maartengr.github.io/KeyBERT/guides/quickstart.html#usage) and [medium post](https://towardsdatascience.com/keyword-extraction-with-bert-724efca412ea) to know how it works.
\ No newline at end of file
diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 0000000..86038ce
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,3 @@
+# 0.1.0
+- Initial commit.
+- Keyphrase Extraction.
\ No newline at end of file
diff --git a/components/__init__.py b/components/__init__.py
new file mode 100644
index 0000000..3fabafc
--- /dev/null
+++ b/components/__init__.py
@@ -0,0 +1,37 @@
+import spacy
+from spacy.language import Language
+from typing import List, Union, Tuple
+from sklearn.feature_extraction.text import CountVectorizer
+from sentence_transformers import SentenceTransformer
+from thinc.api import Config
+from components.keyphrase_extractor import KeyphraseExtractor
+
+# Load components' defaut configuration
+config = Config().from_disk("components/config.cfg")
+
+@Language.factory("kpe", default_config=config["components"]["kpe"])
+def make_keyphrase_extractor(
+    nlp: Language,
+    name: str,
+    model: SentenceTransformer,
+    candidates: List[str] = None,
+    keyphrase_ngram_range: Tuple[int, int] = (1, 1),
+    stop_words: Union[str, List[str]] = None,
+    top_n: int = 5,
+    min_df: int = 1,
+    use_maxsum: bool = False,
+    use_mmr: bool = False,
+    diversity: float = 0.5,
+    nr_candidates: int = 20,
+    vectorizer: CountVectorizer = None,
+    highlight: bool = False,
+    seed_keywords: List[str] = None
+    ):
+
+    kwargs = locals()
+    del kwargs['nlp']
+    del kwargs['name']
+    del kwargs['model']
+
+    return KeyphraseExtractor(model, **kwargs)
+
diff --git a/components/config.cfg b/components/config.cfg
new file mode 100644
index 0000000..cc853fa
--- /dev/null
+++ b/components/config.cfg
@@ -0,0 +1,15 @@
+[components]
+
+[components.kpe]
+candidates = null
+diversity = 0.5
+highlight = false
+keyphrase_ngram_range = [1,1]
+min_df = 1
+nr_candidates = 20
+seed_keywords = null
+stop_words = null
+top_n = 5
+use_maxsum = false
+use_mmr = false
+vectorizer = null
diff --git a/components/keyphrase_extractor.py b/components/keyphrase_extractor.py
new file mode 100644
index 0000000..d8aa013
--- /dev/null
+++ b/components/keyphrase_extractor.py
@@ -0,0 +1,20 @@
+from spacy.tokens import Doc
+from keybert import KeyBERT
+
+class KeyphraseExtractor:
+    """
+    Wrapper class for KeyBERT.
+    """
+    def __init__(self, model, **kwargs):
+        self.model = KeyBERT(model)
+        self.kwargs = kwargs
+        if not Doc.has_extension("keyphrases"):
+            Doc.set_extension("keyphrases", default=[])
+
+    def __call__(self, doc, **kwargs):
+        runtime_kwargs = {}
+        runtime_kwargs.update(self.kwargs)
+        runtime_kwargs.update(kwargs)
+        doc._.keyphrases = self.model.extract_keywords(doc.text, **runtime_kwargs)
+        
+        return doc
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..00d0d73
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+# keyphrase extraction
+keybert==0.5.0
\ No newline at end of file
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/download_models.sh b/scripts/download_models.sh
new file mode 100644
index 0000000..b5ab27f
--- /dev/null
+++ b/scripts/download_models.sh
@@ -0,0 +1,8 @@
+mkdir -p assets
+cd assets
+
+mkdir -p sentence-transformers
+cd sentence-transformers
+git lfs install
+git clone https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
+git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
diff --git a/scripts/main.py b/scripts/main.py
new file mode 100644
index 0000000..d0c0fa5
--- /dev/null
+++ b/scripts/main.py
@@ -0,0 +1,68 @@
+import os
+import spacy
+import components
+from scripts.schemas import *
+from spacy.tokens import Doc
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi_health import health
+from sentence_transformers import SentenceTransformer
+
+# To force the GPU usage: spacy.require_gpu()
+spacy.prefer_gpu()
+
+# Supported languages and corresponding model names
+LM_MAP = {
+    "fr": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+    "en": "sentence-transformers/all-MiniLM-L6-v2"
+    }
+
+# Load models
+MODELS = {LM_MAP[lang]: SentenceTransformer(os.environ.get("ASSETS_PATH") + '/' + LM_MAP[lang]) for lang in os.environ.get("APP_LANG").split(" ")}
+print(f"Loaded {len(MODELS)} models: {MODELS.keys()}")
+
+@spacy.registry.misc("get_model")
+def get_model(name):
+    return MODELS[name]
+
+# Set up the FastAPI app and define the endpoints
+app = FastAPI()
+app.add_middleware(CORSMiddleware, allow_origins=["*"])
+
+# Health check
+def healthy():
+    return {"linto-platform-nlp-keyphrase-extraction": "online"}
+app.add_api_route("/health", health([healthy]))
+
+# Keyphrase Extraction
+def get_data(doc: Doc) -> Dict[str, Any]:
+    """Extract the data to return from the REST API given a Doc object. Modify
+    this function to include other data."""
+    keyphrases = [
+        {
+            "text": keyphrase[0],
+            "score": keyphrase[1]
+        }
+        for keyphrase in doc._.keyphrases
+    ]
+    return {"text": doc.text, "keyphrases": keyphrases}
+
+@app.post("/kpe/{lang}", summary="Keyphrase Extraction", response_model=KpeResponseModel)
+def kpe(lang: str, query: RequestModel):
+    """Process a batch of articles and return the Keyphrases predicted by the
+    given model. Each record in the data should have a key "text".
+    """
+    if lang in LM_MAP.keys():
+        model_name = LM_MAP[lang]
+        if model_name not in MODELS.keys():
+            raise RuntimeError(f"Model {model_name} for language {lang} is not loaded.")
+        nlp = spacy.blank(lang)
+        nlp.add_pipe("kpe", config={"model": {"@misc": "get_model", "name": model_name}})
+    else:
+        raise ValueError(f"Language {lang} is not supported.")
+    
+    response_body = []
+    texts = (article.text for article in query.articles)
+    for doc in nlp.pipe(texts, component_cfg=query.component_cfg):
+        response_body.append(get_data(doc))
+    return {"kpe": response_body}
\ No newline at end of file
diff --git a/scripts/schemas.py b/scripts/schemas.py
new file mode 100644
index 0000000..3fc7bcc
--- /dev/null
+++ b/scripts/schemas.py
@@ -0,0 +1,27 @@
+from typing import List, Dict, Any, Optional
+from pydantic import BaseModel
+
+class Article(BaseModel):
+    # Schema for a single article in a batch of articles to process
+    text: str
+
+
+class RequestModel(BaseModel):
+    # Schema for a request consisting a batch of articles, and component configuration
+    articles: List[Article]
+    component_cfg: Optional[Dict[str, Dict[str, Any]]] = None
+
+
+class KpeResponseModel(BaseModel):
+    # This is the schema of the expected response and depends on what you
+    # return from get_data.
+
+    class Batch(BaseModel):
+        class Kyephrase(BaseModel):
+            text: str
+            score: float
+
+        text: str
+        keyphrases: List[Kyephrase] = []
+
+    kpe: List[Batch]