Skip to content

Commit

Permalink
[DPTOOLS-1903] Remove stale data in ES index (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
jinhyukchang authored Mar 5, 2019
1 parent 37c700d commit 3b89a78
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 4 deletions.
1 change: 1 addition & 0 deletions databuilder/extractor/neo4j_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def _execute_query(self, tx):
"""
Create an iterator to execute sql.
"""
LOGGER.info('Executing query {}'.format(self.cypher_query))
result = tx.run(self.cypher_query)
return result

Expand Down
24 changes: 22 additions & 2 deletions databuilder/extractor/neo4j_search_data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.neo4j_extractor import Neo4jExtractor
from databuilder.publisher.neo4j_csv_publisher import JOB_PUBLISH_TAG


class Neo4jSearchDataExtractor(Extractor):
Expand All @@ -18,6 +19,7 @@ class Neo4jSearchDataExtractor(Extractor):
DEFAULT_NEO4J_CYPHER_QUERY = textwrap.dedent(
"""
MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
{publish_tag_filter}
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
OPTIONAL MATCH (table)-[:COLUMN]->(cols:Column)
Expand All @@ -44,8 +46,11 @@ def init(self, conf):
self.conf = conf

# extract cypher query from conf, if specified, else use default query
self.cypher_query = conf.get_string(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY,
Neo4jSearchDataExtractor.DEFAULT_NEO4J_CYPHER_QUERY)
if Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY in conf:
self.cypher_query = conf.get_string(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY)
else:
self.cypher_query = self._add_publish_tag_filter(conf.get_string(JOB_PUBLISH_TAG, ''),
Neo4jSearchDataExtractor.DEFAULT_NEO4J_CYPHER_QUERY)

self.neo4j_extractor = Neo4jExtractor()
# write the cypher query in configs in Neo4jExtractor scope
Expand All @@ -72,3 +77,18 @@ def extract(self):
def get_scope(self):
# type: () -> str
return 'extractor.search_data'

def _add_publish_tag_filter(self, publish_tag, cypher_query):
"""
Adds publish tag filter into Cypher query
:param publish_tag: value of publish tag.
:param cypher_query:
:return:
"""
# type: (str, str) -> str
if not publish_tag:
publish_tag_filter = ''
else:
publish_tag_filter = """WHERE table.published_tag = '{}'""".format(publish_tag)

return cypher_query.format(publish_tag_filter=publish_tag_filter)
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ class TblColUsgAggExtractor(Extractor):

def init(self, conf):
# type: (ConfigTree) -> None

self._extractor = conf.get(RAW_EXTRACTOR) # type: Extractor
self._extractor.init(Scoped.get_scoped_conf(conf, self._extractor.get_scope()))

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages


__version__ = '1.0.5'
__version__ = '1.0.6'


setup(
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/extractor/test_neo4j_search_data_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import unittest
from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor


class TestNeo4jExtractor(unittest.TestCase):

def test_adding_filter(self):
# type: (Any) -> None
extractor = Neo4jSearchDataExtractor()
actual = extractor._add_publish_tag_filter('foo', 'MATCH (table:Table) {publish_tag_filter} RETURN table')

self.assertEqual(actual, """MATCH (table:Table) WHERE table.published_tag = 'foo' RETURN table""")

def test_not_adding_filter(self):
# type: (Any) -> None
extractor = Neo4jSearchDataExtractor()
actual = extractor._add_publish_tag_filter('', 'MATCH (table:Table) {publish_tag_filter} RETURN table')

self.assertEqual(actual, """MATCH (table:Table) RETURN table""")


if __name__ == '__main__':
unittest.main()

0 comments on commit 3b89a78

Please sign in to comment.