From 20c2fd2f57076ba40ed0552802be0acda8b362d1 Mon Sep 17 00:00:00 2001 From: Wonyeong Choi Date: Sat, 17 Oct 2020 01:25:24 +0900 Subject: [PATCH] refactor: Update ES index maps to use same maps of amundsen-common (#385) * Update setup.py to remove python2.7 Signed-off-by: Wonyeong Choi * Update requirements.txt to include amundsen-common Signed-off-by: Wonyeong Choi * Replace ES index maps with amundsen-common's Signed-off-by: Wonyeong Choi * Remove ES constants file Signed-off-by: Wonyeong Choi * Remove ES constants file Signed-off-by: Wonyeong Choi --- .../publisher/elasticsearch_constants.py | 247 ------------------ .../publisher/elasticsearch_publisher.py | 3 +- docs/dashboard_ingestion_guide.md | 2 +- example/scripts/sample_data_loader.py | 4 +- example/scripts/sample_tableau_data_loader.py | 2 +- requirements.txt | 4 + setup.py | 1 - 7 files changed, 10 insertions(+), 253 deletions(-) delete mode 100644 databuilder/publisher/elasticsearch_constants.py diff --git a/databuilder/publisher/elasticsearch_constants.py b/databuilder/publisher/elasticsearch_constants.py deleted file mode 100644 index f19f69387..000000000 --- a/databuilder/publisher/elasticsearch_constants.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright Contributors to the Amundsen project. -# SPDX-License-Identifier: Apache-2.0 - -import textwrap - -# Documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html -# Setting type to "text" for all fields that would be used in search -# Using Simple Analyzer to convert all text into search terms -# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html -# Standard Analyzer is used for all text fields that don't explicitly specify an analyzer -# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html -# TODO use amundsencommon for this when this project is updated to py3 -TABLE_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( - """ - { - "mappings":{ - "table":{ - "properties": { - "name": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "schema": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "display_name": { - "type": "keyword" - }, - "last_updated_timestamp": { - "type": "date", - "format": "epoch_second" - }, - "description": { - "type": "text", - "analyzer": "simple" - }, - "column_names": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "column_descriptions": { - "type": "text", - "analyzer": "simple" - }, - "tags": { - "type": "keyword" - }, - "badges": { - "type": "keyword" - }, - "cluster": { - "type": "text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "database": { - "type": "text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "key": { - "type": "keyword" - }, - "total_usage":{ - "type": "long" - }, - "unique_usage": { - "type": "long" - }, - "programmatic_descriptions": { - "type": "text", - "analyzer": "simple" - } - } - } - } - } - """ -) - -DASHBOARD_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( - """ - { - "settings": { - "analysis": { - "normalizer": { - "lowercase_normalizer": { - "type": "custom", - "char_filter": [], - "filter": ["lowercase", "asciifolding"] - } - } - } - }, - "mappings":{ - "dashboard":{ - "properties": { - "group_name": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "name": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "description": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "group_description": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "query_names": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "chart_names": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "tags": { - "type": "keyword" - }, - "badges": { - "type": "keyword" - } - } - } - } - } - """ -) - -USER_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( - """ - { - "mappings":{ - "user":{ - "properties": { - "email": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "first_name": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "last_name": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "full_name": { - "type":"text", - "analyzer": "simple", - "fields": { - "raw": { - "type": "keyword" - } - } - }, - "total_read":{ - "type": "long" - }, - "total_own": { - "type": "long" - }, - "total_follow": { - "type": "long" - } - } - } - } - } - """ -) diff --git a/databuilder/publisher/elasticsearch_publisher.py b/databuilder/publisher/elasticsearch_publisher.py index a89650515..4583df187 100644 --- a/databuilder/publisher/elasticsearch_publisher.py +++ b/databuilder/publisher/elasticsearch_publisher.py @@ -8,8 +8,9 @@ from pyhocon import ConfigTree from typing import List +from amundsen_common.models.index_map import TABLE_INDEX_MAP as TABLE_ELASTICSEARCH_INDEX_MAPPING + from databuilder.publisher.base_publisher import Publisher -from databuilder.publisher.elasticsearch_constants import TABLE_ELASTICSEARCH_INDEX_MAPPING LOGGER = logging.getLogger(__name__) diff --git a/docs/dashboard_ingestion_guide.md b/docs/dashboard_ingestion_guide.md index 64e57c779..ee7278be5 100644 --- a/docs/dashboard_ingestion_guide.md +++ b/docs/dashboard_ingestion_guide.md @@ -111,7 +111,7 @@ job = DefaultJob(conf=job_config, job.launch() ``` -*Note that `DASHBOARD_ELASTICSEARCH_INDEX_MAPPING` is defined [here](../databuilder/publisher/elasticsearch_constants.py). +*Note that `DASHBOARD_ELASTICSEARCH_INDEX_MAPPING` is defined [here](https://github.com/amundsen-io/amundsencommon/blob/master/amundsen_common/models/index_map.py). ### 4. Remove stale data diff --git a/example/scripts/sample_data_loader.py b/example/scripts/sample_data_loader.py index 5be9de1f0..107fad9f4 100644 --- a/example/scripts/sample_data_loader.py +++ b/example/scripts/sample_data_loader.py @@ -29,6 +29,8 @@ from elasticsearch import Elasticsearch from pyhocon import ConfigFactory from sqlalchemy.ext.declarative import declarative_base +from amundsen_common.models.index_map import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING +from amundsen_common.models.index_map import USER_INDEX_MAP as USER_ELASTICSEARCH_INDEX_MAPPING from databuilder.extractor.csv_extractor import CsvTableColumnExtractor, CsvExtractor from databuilder.extractor.neo4j_es_last_updated_extractor import Neo4jEsLastUpdatedExtractor @@ -36,8 +38,6 @@ from databuilder.job.job import DefaultJob from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCSVLoader -from databuilder.publisher.elasticsearch_constants import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING, \ - USER_ELASTICSEARCH_INDEX_MAPPING from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher from databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher from databuilder.task.task import DefaultTask diff --git a/example/scripts/sample_tableau_data_loader.py b/example/scripts/sample_tableau_data_loader.py index 07a093eab..096a246fa 100644 --- a/example/scripts/sample_tableau_data_loader.py +++ b/example/scripts/sample_tableau_data_loader.py @@ -23,12 +23,12 @@ from elasticsearch import Elasticsearch from pyhocon import ConfigFactory from sqlalchemy.ext.declarative import declarative_base +from amundsen_common.models.index_map import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor from databuilder.job.job import DefaultJob from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCSVLoader -from databuilder.publisher.elasticsearch_constants import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher from databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher from databuilder.task.task import DefaultTask diff --git a/requirements.txt b/requirements.txt index 671a43b84..fa893b1f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,6 +38,10 @@ typing==3.6.4 # Upstream url: https://pypi.org/project/elasticsearch/ elasticsearch>=6.2.0,<7.0 +# A common package that holds the models deifnition and schemas that are used +# accross different amundsen repositories. +amundsen-common>=0.5.6,<1.0 + atomicwrites==1.1.5 more-itertools==4.2.0 pluggy>=0.6.0 diff --git a/setup.py b/setup.py index 53995386c..98afe2a84 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,6 @@ install_requires=requirements, python_requires='>=3.6,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*', extras_require={ - ':python_version=="2.7"': ['typing>=3.6'], # allow typehinting PY2 'all': all_deps, 'kafka': kafka, # To use with Kafka source extractor 'cassandra': cassandra,