From 20c2fd2f57076ba40ed0552802be0acda8b362d1 Mon Sep 17 00:00:00 2001
From: Wonyeong Choi <ciwnyg0815@gmail.com>
Date: Sat, 17 Oct 2020 01:25:24 +0900
Subject: [PATCH] refactor: Update ES index maps to use same maps of
 amundsen-common (#385)

* Update setup.py to remove python2.7

Signed-off-by: Wonyeong Choi <ciwnyg0815@gmail.com>

* Update requirements.txt to include amundsen-common

Signed-off-by: Wonyeong Choi <ciwnyg0815@gmail.com>

* Replace ES index maps with amundsen-common's

Signed-off-by: Wonyeong Choi <ciwnyg0815@gmail.com>

* Remove ES constants file

Signed-off-by: Wonyeong Choi <ciwnyg0815@gmail.com>

* Remove ES constants file

Signed-off-by: Wonyeong Choi <ciwnyg0815@gmail.com>
---
 .../publisher/elasticsearch_constants.py      | 247 ------------------
 .../publisher/elasticsearch_publisher.py      |   3 +-
 docs/dashboard_ingestion_guide.md             |   2 +-
 example/scripts/sample_data_loader.py         |   4 +-
 example/scripts/sample_tableau_data_loader.py |   2 +-
 requirements.txt                              |   4 +
 setup.py                                      |   1 -
 7 files changed, 10 insertions(+), 253 deletions(-)
 delete mode 100644 databuilder/publisher/elasticsearch_constants.py

diff --git a/databuilder/publisher/elasticsearch_constants.py b/databuilder/publisher/elasticsearch_constants.py
deleted file mode 100644
index f19f69387..000000000
--- a/databuilder/publisher/elasticsearch_constants.py
+++ /dev/null
@@ -1,247 +0,0 @@
-# Copyright Contributors to the Amundsen project.
-# SPDX-License-Identifier: Apache-2.0
-
-import textwrap
-
-# Documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html
-# Setting type to "text" for all fields that would be used in search
-# Using Simple Analyzer to convert all text into search terms
-# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html
-# Standard Analyzer is used for all text fields that don't explicitly specify an analyzer
-# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html
-# TODO use amundsencommon for this when this project is updated to py3
-TABLE_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
-    """
-    {
-    "mappings":{
-        "table":{
-          "properties": {
-            "name": {
-              "type":"text",
-              "analyzer": "simple",
-              "fields": {
-                "raw": {
-                  "type": "keyword"
-                }
-              }
-            },
-            "schema": {
-              "type":"text",
-              "analyzer": "simple",
-              "fields": {
-                "raw": {
-                  "type": "keyword"
-                }
-              }
-            },
-            "display_name": {
-              "type": "keyword"
-            },
-            "last_updated_timestamp": {
-              "type": "date",
-              "format": "epoch_second"
-            },
-            "description": {
-              "type": "text",
-              "analyzer": "simple"
-            },
-            "column_names": {
-              "type":"text",
-              "analyzer": "simple",
-              "fields": {
-                "raw": {
-                  "type": "keyword"
-                }
-              }
-            },
-            "column_descriptions": {
-              "type": "text",
-              "analyzer": "simple"
-            },
-            "tags": {
-              "type": "keyword"
-            },
-            "badges": {
-              "type": "keyword"
-            },
-            "cluster": {
-              "type": "text",
-              "analyzer": "simple",
-              "fields": {
-                "raw": {
-                  "type": "keyword"
-                }
-              }
-            },
-            "database": {
-              "type": "text",
-              "analyzer": "simple",
-              "fields": {
-                "raw": {
-                  "type": "keyword"
-                }
-              }
-            },
-            "key": {
-              "type": "keyword"
-            },
-            "total_usage":{
-              "type": "long"
-            },
-            "unique_usage": {
-              "type": "long"
-            },
-            "programmatic_descriptions": {
-              "type": "text",
-              "analyzer": "simple"
-            }
-          }
-        }
-      }
-    }
-    """
-)
-
-DASHBOARD_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
-    """
-    {
-        "settings": {
-          "analysis": {
-            "normalizer": {
-              "lowercase_normalizer": {
-                "type": "custom",
-                "char_filter": [],
-                "filter": ["lowercase", "asciifolding"]
-              }
-            }
-          }
-        },
-        "mappings":{
-            "dashboard":{
-              "properties": {
-                "group_name": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword",
-                      "normalizer": "lowercase_normalizer"
-                    }
-                  }
-                },
-                "name": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword",
-                      "normalizer": "lowercase_normalizer"
-                    }
-                  }
-                },
-                "description": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "group_description": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "query_names": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "chart_names": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "tags": {
-                  "type": "keyword"
-                },
-                "badges": {
-                  "type": "keyword"
-                }
-              }
-            }
-          }
-        }
-    """
-)
-
-USER_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
-    """
-    {
-        "mappings":{
-            "user":{
-              "properties": {
-                "email": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "first_name": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "last_name": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "full_name": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "total_read":{
-                  "type": "long"
-                },
-                "total_own": {
-                  "type": "long"
-                },
-                "total_follow": {
-                  "type": "long"
-                }
-              }
-            }
-          }
-        }
-    """
-)
diff --git a/databuilder/publisher/elasticsearch_publisher.py b/databuilder/publisher/elasticsearch_publisher.py
index a89650515..4583df187 100644
--- a/databuilder/publisher/elasticsearch_publisher.py
+++ b/databuilder/publisher/elasticsearch_publisher.py
@@ -8,8 +8,9 @@
 from pyhocon import ConfigTree
 from typing import List
 
+from amundsen_common.models.index_map import TABLE_INDEX_MAP as TABLE_ELASTICSEARCH_INDEX_MAPPING
+
 from databuilder.publisher.base_publisher import Publisher
-from databuilder.publisher.elasticsearch_constants import TABLE_ELASTICSEARCH_INDEX_MAPPING
 
 LOGGER = logging.getLogger(__name__)
 
diff --git a/docs/dashboard_ingestion_guide.md b/docs/dashboard_ingestion_guide.md
index 64e57c779..ee7278be5 100644
--- a/docs/dashboard_ingestion_guide.md
+++ b/docs/dashboard_ingestion_guide.md
@@ -111,7 +111,7 @@ job = DefaultJob(conf=job_config,
 job.launch()
 ```
 
-*Note that `DASHBOARD_ELASTICSEARCH_INDEX_MAPPING` is defined [here](../databuilder/publisher/elasticsearch_constants.py).  
+*Note that `DASHBOARD_ELASTICSEARCH_INDEX_MAPPING` is defined [here](https://github.com/amundsen-io/amundsencommon/blob/master/amundsen_common/models/index_map.py).  
 
 
 ### 4. Remove stale data
diff --git a/example/scripts/sample_data_loader.py b/example/scripts/sample_data_loader.py
index 5be9de1f0..107fad9f4 100644
--- a/example/scripts/sample_data_loader.py
+++ b/example/scripts/sample_data_loader.py
@@ -29,6 +29,8 @@
 from elasticsearch import Elasticsearch
 from pyhocon import ConfigFactory
 from sqlalchemy.ext.declarative import declarative_base
+from amundsen_common.models.index_map import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING
+from amundsen_common.models.index_map import USER_INDEX_MAP as USER_ELASTICSEARCH_INDEX_MAPPING
 
 from databuilder.extractor.csv_extractor import CsvTableColumnExtractor, CsvExtractor
 from databuilder.extractor.neo4j_es_last_updated_extractor import Neo4jEsLastUpdatedExtractor
@@ -36,8 +38,6 @@
 from databuilder.job.job import DefaultJob
 from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader
 from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCSVLoader
-from databuilder.publisher.elasticsearch_constants import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING, \
-    USER_ELASTICSEARCH_INDEX_MAPPING
 from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher
 from databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher
 from databuilder.task.task import DefaultTask
diff --git a/example/scripts/sample_tableau_data_loader.py b/example/scripts/sample_tableau_data_loader.py
index 07a093eab..096a246fa 100644
--- a/example/scripts/sample_tableau_data_loader.py
+++ b/example/scripts/sample_tableau_data_loader.py
@@ -23,12 +23,12 @@
 from elasticsearch import Elasticsearch
 from pyhocon import ConfigFactory
 from sqlalchemy.ext.declarative import declarative_base
+from amundsen_common.models.index_map import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING
 
 from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor
 from databuilder.job.job import DefaultJob
 from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader
 from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCSVLoader
-from databuilder.publisher.elasticsearch_constants import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING
 from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher
 from databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher
 from databuilder.task.task import DefaultTask
diff --git a/requirements.txt b/requirements.txt
index 671a43b84..fa893b1f2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,6 +38,10 @@ typing==3.6.4
 # Upstream url: https://pypi.org/project/elasticsearch/
 elasticsearch>=6.2.0,<7.0
 
+# A common package that holds the models deifnition and schemas that are used
+# accross different amundsen repositories.
+amundsen-common>=0.5.6,<1.0
+
 atomicwrites==1.1.5
 more-itertools==4.2.0
 pluggy>=0.6.0
diff --git a/setup.py b/setup.py
index 53995386c..98afe2a84 100644
--- a/setup.py
+++ b/setup.py
@@ -70,7 +70,6 @@
     install_requires=requirements,
     python_requires='>=3.6,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*',
     extras_require={
-        ':python_version=="2.7"': ['typing>=3.6'],  # allow typehinting PY2
         'all': all_deps,
         'kafka': kafka,  # To use with Kafka source extractor
         'cassandra': cassandra,