Skip to content

Commit

Permalink
Revert "refactor: Update ES index maps to use same maps of amundsen-c…
Browse files Browse the repository at this point in the history
…ommon (#385)" (#389)

This reverts commit 20c2fd2.

Signed-off-by: Allison Suarez Miranda <[email protected]>
  • Loading branch information
allisonsuarez authored Oct 20, 2020
1 parent 9595866 commit 2870387
Show file tree
Hide file tree
Showing 7 changed files with 253 additions and 10 deletions.
247 changes: 247 additions & 0 deletions databuilder/publisher/elasticsearch_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import textwrap

# Documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html
# Setting type to "text" for all fields that would be used in search
# Using Simple Analyzer to convert all text into search terms
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html
# Standard Analyzer is used for all text fields that don't explicitly specify an analyzer
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html
# TODO use amundsencommon for this when this project is updated to py3
TABLE_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
"""
{
"mappings":{
"table":{
"properties": {
"name": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"schema": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"display_name": {
"type": "keyword"
},
"last_updated_timestamp": {
"type": "date",
"format": "epoch_second"
},
"description": {
"type": "text",
"analyzer": "simple"
},
"column_names": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"column_descriptions": {
"type": "text",
"analyzer": "simple"
},
"tags": {
"type": "keyword"
},
"badges": {
"type": "keyword"
},
"cluster": {
"type": "text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"database": {
"type": "text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"key": {
"type": "keyword"
},
"total_usage":{
"type": "long"
},
"unique_usage": {
"type": "long"
},
"programmatic_descriptions": {
"type": "text",
"analyzer": "simple"
}
}
}
}
}
"""
)

DASHBOARD_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
"""
{
"settings": {
"analysis": {
"normalizer": {
"lowercase_normalizer": {
"type": "custom",
"char_filter": [],
"filter": ["lowercase", "asciifolding"]
}
}
}
},
"mappings":{
"dashboard":{
"properties": {
"group_name": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
}
}
},
"name": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
}
}
},
"description": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"group_description": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"query_names": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"chart_names": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"tags": {
"type": "keyword"
},
"badges": {
"type": "keyword"
}
}
}
}
}
"""
)

USER_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
"""
{
"mappings":{
"user":{
"properties": {
"email": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"first_name": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"last_name": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"full_name": {
"type":"text",
"analyzer": "simple",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"total_read":{
"type": "long"
},
"total_own": {
"type": "long"
},
"total_follow": {
"type": "long"
}
}
}
}
}
"""
)
3 changes: 1 addition & 2 deletions databuilder/publisher/elasticsearch_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
from pyhocon import ConfigTree
from typing import List

from amundsen_common.models.index_map import TABLE_INDEX_MAP as TABLE_ELASTICSEARCH_INDEX_MAPPING

from databuilder.publisher.base_publisher import Publisher
from databuilder.publisher.elasticsearch_constants import TABLE_ELASTICSEARCH_INDEX_MAPPING

LOGGER = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion docs/dashboard_ingestion_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ job = DefaultJob(conf=job_config,
job.launch()
```

*Note that `DASHBOARD_ELASTICSEARCH_INDEX_MAPPING` is defined [here](https://github.com/amundsen-io/amundsencommon/blob/master/amundsen_common/models/index_map.py).
*Note that `DASHBOARD_ELASTICSEARCH_INDEX_MAPPING` is defined [here](../databuilder/publisher/elasticsearch_constants.py).


### 4. Remove stale data
Expand Down
4 changes: 2 additions & 2 deletions example/scripts/sample_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@
from elasticsearch import Elasticsearch
from pyhocon import ConfigFactory
from sqlalchemy.ext.declarative import declarative_base
from amundsen_common.models.index_map import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING
from amundsen_common.models.index_map import USER_INDEX_MAP as USER_ELASTICSEARCH_INDEX_MAPPING

from databuilder.extractor.csv_extractor import CsvTableColumnExtractor, CsvExtractor
from databuilder.extractor.neo4j_es_last_updated_extractor import Neo4jEsLastUpdatedExtractor
from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor
from databuilder.job.job import DefaultJob
from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader
from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCSVLoader
from databuilder.publisher.elasticsearch_constants import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING, \
USER_ELASTICSEARCH_INDEX_MAPPING
from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher
from databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher
from databuilder.task.task import DefaultTask
Expand Down
2 changes: 1 addition & 1 deletion example/scripts/sample_tableau_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
from elasticsearch import Elasticsearch
from pyhocon import ConfigFactory
from sqlalchemy.ext.declarative import declarative_base
from amundsen_common.models.index_map import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING

from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor
from databuilder.job.job import DefaultJob
from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader
from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCSVLoader
from databuilder.publisher.elasticsearch_constants import DASHBOARD_ELASTICSEARCH_INDEX_MAPPING
from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher
from databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher
from databuilder.task.task import DefaultTask
Expand Down
4 changes: 0 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,6 @@ typing==3.6.4
# Upstream url: https://pypi.org/project/elasticsearch/
elasticsearch>=6.2.0,<7.0

# A common package that holds the models deifnition and schemas that are used
# accross different amundsen repositories.
amundsen-common>=0.5.6,<1.0

atomicwrites==1.1.5
more-itertools==4.2.0
pluggy>=0.6.0
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
install_requires=requirements,
python_requires='>=3.6,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*',
extras_require={
':python_version=="2.7"': ['typing>=3.6'], # allow typehinting PY2
'all': all_deps,
'kafka': kafka, # To use with Kafka source extractor
'cassandra': cassandra,
Expand Down

0 comments on commit 2870387

Please sign in to comment.