-
Notifications
You must be signed in to change notification settings - Fork 208
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: column level badges cont. (#381)
* made create_relation meth for BadgeMetadata Signed-off-by: Allison Suarez Miranda <[email protected]> * small fix Signed-off-by: Allison Suarez Miranda <[email protected]> * started setting up badge to be entity Signed-off-by: Allison Suarez Miranda <[email protected]> * made standalone badge metadata file and changed table metadata to use this for column badges, can use with any other entity really Signed-off-by: Allison Suarez Miranda <[email protected]> * fixed an oopsie in column metadata Signed-off-by: Allison Suarez Miranda <[email protected]> * added tests for badge.py and fixed none type issue in cloumn metadata Signed-off-by: Allison Suarez Miranda <[email protected]> * replaced with None cause badges are optional Signed-off-by: Allison Suarez Miranda <[email protected]> * fixed all unit tests Signed-off-by: Allison Suarez Miranda <[email protected]> * lint :/ Signed-off-by: Allison Suarez Miranda <[email protected]> * fixed typing issues Signed-off-by: Allison Suarez Miranda <[email protected]> * bumped minor version on setup.py Signed-off-by: Allison Suarez Miranda <[email protected]> * removed leftover comments and prints Signed-off-by: Allison Suarez Miranda <[email protected]> * implemented check and pattern matching for start label and key on badge Signed-off-by: Allison Suarez Miranda <[email protected]> * fixed circular dep and regex issue Signed-off-by: Allison Suarez Miranda <[email protected]> * tests for exceptions Signed-off-by: Allison Suarez Miranda <[email protected]>
- Loading branch information
1 parent
20c2fd2
commit af4b512
Showing
6 changed files
with
241 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# Copyright Contributors to the Amundsen project. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from typing import Any, Dict, List, Optional | ||
import re | ||
|
||
from databuilder.models.neo4j_csv_serde import Neo4jCsvSerializable, NODE_KEY, \ | ||
NODE_LABEL, RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \ | ||
RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE | ||
|
||
|
||
class Badge: | ||
def __init__(self, name: str, category: str): | ||
self.name = name | ||
self.category = category | ||
|
||
def __repr__(self) -> str: | ||
return 'Badge({!r}, {!r})'.format(self.name, | ||
self.category) | ||
|
||
|
||
class BadgeMetadata(Neo4jCsvSerializable): | ||
""" | ||
Badge model. | ||
""" | ||
BADGE_NODE_LABEL = 'Badge' | ||
BADGE_KEY_FORMAT = '{badge}' | ||
BADGE_CATEGORY = 'category' | ||
|
||
# Relation between entity and badge | ||
BADGE_RELATION_TYPE = 'HAS_BADGE' | ||
INVERSE_BADGE_RELATION_TYPE = 'BADGE_FOR' | ||
|
||
def __init__(self, | ||
db_name: str, | ||
schema: str, | ||
start_label: str, # Table, Dashboard, Column | ||
start_key: str, | ||
badges: List[Badge], | ||
cluster: str = 'gold', # is this what we want as default for badges..? | ||
): | ||
self.badges = badges | ||
|
||
self.db = db_name.lower() | ||
self.schema = schema.lower() | ||
self.cluster = cluster.lower() | ||
|
||
table_key_pattern = re.compile('[a-z]+://[a-zA-Z0-9_.-]+.[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+') | ||
dashboard_key_pattern = re.compile('[a-z]+_dashboard://[a-zA-Z0-9_.-]+.[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+') | ||
column_key_pattern = re.compile('[a-z]+://[a-zA-Z0-9_.-]+.[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+') | ||
map_label_to_key_pattern = { | ||
'Table': table_key_pattern, | ||
'Dashboard': dashboard_key_pattern, | ||
'Column': column_key_pattern, | ||
} | ||
if start_label in map_label_to_key_pattern.keys(): | ||
self.start_label = start_label | ||
if map_label_to_key_pattern[start_label].match(start_key): | ||
self.start_key = start_key | ||
else: | ||
raise Exception(start_key + ' does not match the key pattern for a ' + start_label) | ||
else: | ||
raise Exception(start_label + ' is not a valid start_label for a Badge relation') | ||
|
||
self._node_iter = iter(self.create_nodes()) | ||
self._relation_iter = iter(self.create_relation()) | ||
|
||
def create_next_node(self) -> Optional[Dict[str, Any]]: | ||
# return the string representation of the data | ||
try: | ||
return next(self._node_iter) | ||
except StopIteration: | ||
return None | ||
|
||
def create_next_relation(self) -> Optional[Dict[str, Any]]: | ||
try: | ||
return next(self._relation_iter) | ||
except StopIteration: | ||
return None | ||
|
||
@staticmethod | ||
def get_badge_key(name: str) -> str: | ||
if not name: | ||
return '' | ||
return BadgeMetadata.BADGE_KEY_FORMAT.format(badge=name) | ||
|
||
def get_metadata_model_key(self) -> str: | ||
return self.start_key | ||
|
||
def create_nodes(self) -> List[Dict[str, Any]]: | ||
""" | ||
Create a list of Neo4j node records | ||
:return: | ||
""" | ||
results = [] | ||
for badge in self.badges: | ||
if badge: | ||
results.append({ | ||
NODE_KEY: self.get_badge_key(badge.name), | ||
NODE_LABEL: self.BADGE_NODE_LABEL, | ||
self.BADGE_CATEGORY: badge.category | ||
}) | ||
return results | ||
|
||
def create_relation(self) -> List[Dict[str, Any]]: | ||
results = [] | ||
for badge in self.badges: | ||
results.append({ | ||
RELATION_START_LABEL: self.start_label, | ||
RELATION_END_LABEL: self.BADGE_NODE_LABEL, | ||
RELATION_START_KEY: self.start_key, | ||
RELATION_END_KEY: self.get_badge_key(badge.name), | ||
RELATION_TYPE: self.BADGE_RELATION_TYPE, | ||
RELATION_REVERSE_TYPE: self.INVERSE_BADGE_RELATION_TYPE, | ||
}) | ||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# Copyright Contributors to the Amundsen project. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
import unittest | ||
from databuilder.models.badge import Badge, BadgeMetadata | ||
|
||
from databuilder.models.neo4j_csv_serde import NODE_KEY, NODE_LABEL, \ | ||
RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \ | ||
RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE | ||
|
||
db = 'hive' | ||
SCHEMA = 'BASE' | ||
TABLE = 'TEST' | ||
CLUSTER = 'DEFAULT' | ||
badge1 = Badge('badge1', 'column') | ||
badge2 = Badge('badge2', 'column') | ||
|
||
|
||
class TestBadge(unittest.TestCase): | ||
def setUp(self) -> None: | ||
super(TestBadge, self).setUp() | ||
self.badge_metada = BadgeMetadata(db_name='hive', | ||
schema=SCHEMA, | ||
start_label='Column', | ||
start_key='hive://default.base/test/ds', | ||
cluster=CLUSTER, | ||
badges=[badge1, badge2]) | ||
|
||
def test_get_badge_key(self) -> None: | ||
badge_key = self.badge_metada.get_badge_key(badge1.name) | ||
self.assertEquals(badge_key, badge1.name) | ||
|
||
def test_create_nodes(self) -> None: | ||
nodes = self.badge_metada.create_nodes() | ||
self.assertEquals(len(nodes), 2) | ||
|
||
node1 = { | ||
NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name), | ||
NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, | ||
BadgeMetadata.BADGE_CATEGORY: badge1.category | ||
} | ||
node2 = { | ||
NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name), | ||
NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, | ||
BadgeMetadata.BADGE_CATEGORY: badge2.category | ||
} | ||
|
||
self.assertTrue(node1 in nodes) | ||
self.assertTrue(node2 in nodes) | ||
|
||
def test_bad_key_entity_match(self) -> None: | ||
column_label = 'Column' | ||
table_key = 'hive://default.base/test' | ||
|
||
self.assertRaises(Exception, | ||
BadgeMetadata, | ||
db_name='hive', | ||
schema=SCHEMA, | ||
start_label=column_label, | ||
start_key=table_key, | ||
cluster=CLUSTER, | ||
badges=[badge1, badge2]) | ||
|
||
def test_bad_entity_label(self) -> None: | ||
user_label = 'User' | ||
table_key = 'hive://default.base/test' | ||
self.assertRaises(Exception, | ||
BadgeMetadata, | ||
db_name='hive', | ||
schema=SCHEMA, | ||
start_label=user_label, | ||
start_key=table_key, | ||
cluster=CLUSTER, | ||
badges=[badge1, badge2]) | ||
|
||
def test_create_relation(self) -> None: | ||
relations = self.badge_metada.create_relation() | ||
self.assertEquals(len(relations), 2) | ||
|
||
relation1 = { | ||
RELATION_START_LABEL: self.badge_metada.start_label, | ||
RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, | ||
RELATION_START_KEY: self.badge_metada.start_key, | ||
RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name), | ||
RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, | ||
RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, | ||
} | ||
relation2 = { | ||
RELATION_START_LABEL: self.badge_metada.start_label, | ||
RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, | ||
RELATION_START_KEY: self.badge_metada.start_key, | ||
RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name), | ||
RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, | ||
RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, | ||
} | ||
|
||
self.assertTrue(relation1 in relations) | ||
self.assertTrue(relation2 in relations) |