diff --git a/databuilder/publisher/elasticsearch_constants.py b/databuilder/publisher/elasticsearch_constants.py new file mode 100644 index 000000000..1e407ef2e --- /dev/null +++ b/databuilder/publisher/elasticsearch_constants.py @@ -0,0 +1,207 @@ +import textwrap + +# Documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html +# Setting type to "text" for all fields that would be used in search +# Using Simple Analyzer to convert all text into search terms +# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html +# Standard Analyzer is used for all text fields that don't explicitly specify an analyzer +# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html +# TODO use amundsencommon for this when this project is updated to py3 +TABLE_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( + """ + { + "mappings":{ + "table":{ + "properties": { + "name": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "schema": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "display_name": { + "type": "keyword" + }, + "last_updated_timestamp": { + "type": "date", + "format": "epoch_second" + }, + "description": { + "type": "text", + "analyzer": "simple" + }, + "column_names": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "column_descriptions": { + "type": "text", + "analyzer": "simple" + }, + "tags": { + "type": "keyword" + }, + "badges": { + "type": "keyword" + }, + "cluster": { + "type": "text" + }, + "database": { + "type": "text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "key": { + "type": "keyword" + }, + "total_usage":{ + "type": "long" + }, + "unique_usage": { + "type": "long" + } + } + } + } + } + """ +) + +DASHBOARD_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( + """ + { + "mappings":{ + "dashboard":{ + "properties": { + "group_name": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "name": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "description": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "group_description": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "query_names": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + } + } + } + } + } + + """ +) + +USER_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( + """ + { + "mappings":{ + "user":{ + "properties": { + "email": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "first_name": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "last_name": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "full_name": { + "type":"text", + "analyzer": "simple", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "total_read":{ + "type": "long" + }, + "total_own": { + "type": "long" + }, + "total_follow": { + "type": "long" + } + } + } + } + } + """ +) diff --git a/databuilder/publisher/elasticsearch_publisher.py b/databuilder/publisher/elasticsearch_publisher.py index 4c49d9860..5728808bb 100644 --- a/databuilder/publisher/elasticsearch_publisher.py +++ b/databuilder/publisher/elasticsearch_publisher.py @@ -1,12 +1,12 @@ import json import logging -import textwrap -from typing import List # noqa: F401 -from pyhocon import ConfigTree # noqa: F401 from elasticsearch.exceptions import NotFoundError +from pyhocon import ConfigTree # noqa: F401 +from typing import List # noqa: F401 from databuilder.publisher.base_publisher import Publisher +from databuilder.publisher.elasticsearch_constants import TABLE_ELASTICSEARCH_INDEX_MAPPING LOGGER = logging.getLogger(__name__) @@ -32,95 +32,7 @@ class ElasticsearchPublisher(Publisher): # config to control how many max documents to publish at a time ELASTICSEARCH_PUBLISHER_BATCH_SIZE = 'batch_size' - # Specifying default mapping for elasticsearch index - # Documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html - # Setting type to "text" for all fields that would be used in search - # Using Simple Analyzer to convert all text into search terms - # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html - # Standard Analyzer is used for all text fields that don't explicitly specify an analyzer - # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html - # TODO use amundsencommon for this when this project is updated to py3 - DEFAULT_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( - """ - { - "mappings":{ - "table":{ - "properties": { - "name": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "schema": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "display_name": { - "type": "keyword" - }, - "last_updated_timestamp": { - "type": "date", - "format": "epoch_second" - }, - "description": { - "type": "text", - "analyzer": "simple" - }, - "column_names": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "column_descriptions": { - "type": "text", - "analyzer": "simple" - }, - "tags": { - "type": "keyword" - }, - "badges": { - "type": "keyword" - }, - "cluster": { - "type": "text" - }, - "database": { - "type": "text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "key": { - "type": "keyword" - }, - "total_usage":{ - "type": "long" - }, - "unique_usage": { - "type": "long" - } - } - } - } - } - """ - ) + DEFAULT_ELASTICSEARCH_INDEX_MAPPING = TABLE_ELASTICSEARCH_INDEX_MAPPING def __init__(self): # type: () -> None diff --git a/setup.py b/setup.py index f916f7273..25732679a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages -__version__ = '2.5.13' +__version__ = '2.5.14' requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt') with open(requirements_path) as requirements_file: