Skip to content

Commit

Permalink
View Column persist_docs + functional tests (#519)
Browse files Browse the repository at this point in the history
  • Loading branch information
benc-db authored Nov 30, 2023
2 parents 526969b + 2a6e8a7 commit 3af8fd3
Show file tree
Hide file tree
Showing 16 changed files with 131 additions and 236 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Adding capability to specify compute on a per model basis ([488](https://github.com/databricks/dbt-databricks/pull/488))
- Selectively persist column docs that have changed between runs of incremental ([513](https://github.com/databricks/dbt-databricks/pull/513))
- Enabling access control list for job runs (thanks @srggrs!)([518](https://github.com/databricks/dbt-databricks/pull/518))
- Allow persisting of column comments on views and retrieving comments for docs on Hive ([519](https://github.com/databricks/dbt-databricks/pull/519))

## dbt-databricks 1.7.1 (Nov 13, 2023)

Expand Down
1 change: 1 addition & 0 deletions dbt/adapters/databricks/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

@dataclass
class DatabricksColumn(SparkColumn):
table_comment: Optional[str] = None
comment: Optional[str] = None

TYPE_LABELS: ClassVar[Dict[str, str]] = {
Expand Down
15 changes: 15 additions & 0 deletions dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
SHOW_TABLE_EXTENDED_MACRO_NAME = "show_table_extended"
SHOW_TABLES_MACRO_NAME = "show_tables"
SHOW_VIEWS_MACRO_NAME = "show_views"
GET_COLUMNS_COMMENTS_MACRO_NAME = "get_columns_comments"


@dataclass
Expand Down Expand Up @@ -104,6 +105,8 @@ def get_identifier_list_string(table_names: Set[str]) -> str:

@undefined_proof
class DatabricksAdapter(SparkAdapter):
INFORMATION_COMMENT_REGEX = re.compile(r"Comment: (.*)\n[A-Z][A-Za-z ]+:", re.DOTALL)

Relation = DatabricksRelation
Column = DatabricksColumn

Expand Down Expand Up @@ -388,6 +391,7 @@ def parse_describe_extended( # type: ignore[override]
table_type=relation.type,
table_owner=str(metadata.get(KEY_TABLE_OWNER)),
table_stats=table_stats,
table_comment=metadata.get("Comment"),
column=column["col_name"],
column_index=idx,
dtype=column["data_type"],
Expand Down Expand Up @@ -444,23 +448,32 @@ def _set_relation_information(self, relation: DatabricksRelation) -> DatabricksR

return self._get_updated_relation(relation)[0]

def _get_column_comments(self, relation: DatabricksRelation) -> Dict[str, str]:
"""Get the column comments for the relation."""
columns = self.execute_macro(GET_COLUMNS_COMMENTS_MACRO_NAME, kwargs={"relation": relation})
return {row[0]: row[2] for row in columns}

def parse_columns_from_information( # type: ignore[override]
self, relation: DatabricksRelation, information: str
) -> List[DatabricksColumn]:
owner_match = re.findall(self.INFORMATION_OWNER_REGEX, information)
owner = owner_match[0] if owner_match else None
comment_match = re.findall(self.INFORMATION_COMMENT_REGEX, information)
table_comment = comment_match[0] if comment_match else None
matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, information)
columns = []
stats_match = re.findall(self.INFORMATION_STATISTICS_REGEX, information)
raw_table_stats = stats_match[0] if stats_match else None
table_stats = DatabricksColumn.convert_table_stats(raw_table_stats)

for match_num, match in enumerate(matches):
column_name, column_type, nullable = match.groups()
column = DatabricksColumn(
table_database=relation.database,
table_schema=relation.schema,
table_name=relation.table,
table_type=relation.type,
table_comment=table_comment,
column_index=match_num,
table_owner=owner,
column=column_name,
Expand Down Expand Up @@ -583,11 +596,13 @@ def _get_columns_for_catalog( # type: ignore[override]
) -> Iterable[Dict[str, Any]]:
columns = self.parse_columns_from_information(relation, information)

comments = self._get_column_comments(relation)
for column in columns:
# convert DatabricksRelation into catalog dicts
as_dict = column.to_column_dict()
as_dict["column_name"] = as_dict.pop("column", None)
as_dict["column_type"] = as_dict.pop("dtype")
as_dict["column_comment"] = comments[as_dict["column_name"]]
yield as_dict

def add_query(
Expand Down
32 changes: 32 additions & 0 deletions dbt/include/databricks/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,32 @@
{%- endif -%}
{%- endmacro -%}

{% macro get_column_comment_sql(column_name, column_dict) -%}
{% if column_name in column_dict and column_dict[column_name]["description"] -%}
{% set escaped_description = column_dict[column_name]["description"] | replace("'", "\\'") %}
{% set column_comment_clause = "comment '" ~ escaped_description ~ "'" %}
{%- endif -%}
{{ adapter.quote(column_name) }} {{ column_comment_clause }}
{% endmacro %}

{% macro get_persist_docs_column_list(model_columns, query_columns) %}
{% for column_name in query_columns %}
{{ get_column_comment_sql(column_name, model_columns) }}
{{- ", " if not loop.last else "" }}
{% endfor %}
{% endmacro %}

{% macro databricks__create_view_as(relation, sql) -%}
create or replace view {{ relation }}
{% if config.persist_column_docs() -%}
{% set model_columns = model.columns %}
{% set query_columns = get_columns_in_query(sql) %}
{% if query_columns %}
(
{{ get_persist_docs_column_list(model_columns, query_columns) }}
)
{% endif %}
{% endif %}
{{ comment_clause() }}
{%- set contract_config = config.get('contract') -%}
{% if contract_config and contract_config.enforced %}
Expand Down Expand Up @@ -512,4 +536,12 @@
{%- set columns_to_persist_docs = adapter.get_persist_doc_columns(existing_columns, model.columns) -%}
{% do alter_column_comment(relation, columns_to_persist_docs) %}
{% endif %}
{% endmacro %}

{% macro get_columns_comments(relation) -%}
{% call statement('get_columns_comments', fetch_result=True) -%}
describe table {{ relation }}
{% endcall %}

{% do return(load_result('get_columns_comments').table) %}
{% endmacro %}
67 changes: 67 additions & 0 deletions tests/functional/adapter/persist_docs/test_persist_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import json
from dbt.tests.adapter.persist_docs.test_persist_docs import (
BasePersistDocsBase,
BasePersistDocsColumnMissing,
BasePersistDocsCommentOnQuotedColumn,
)
from dbt.tests import util

import pytest


class TestPersistDocs(BasePersistDocsBase):
def _assert_has_view_comments(
self, view_node, has_node_comments=True, has_column_comments=True
):
view_comment = view_node["metadata"]["comment"]
if has_node_comments:
assert view_comment.startswith("View model description")
self._assert_common_comments(view_comment)
else:
assert view_comment == "" or view_comment is None

view_id_comment = view_node["columns"]["id"]["comment"]
if has_column_comments:
assert view_id_comment.startswith("id Column description")
self._assert_common_comments(view_id_comment)
else:
assert view_id_comment is None

view_name_comment = view_node["columns"]["name"]["comment"]
assert view_name_comment is None

@pytest.fixture(scope="class")
def project_config_update(self):
return {
"models": {
"test": {
"+persist_docs": {
"relation": True,
"columns": True,
},
}
}
}

def test_has_comments(self, project):
util.run_dbt(["docs", "generate"])
with open("target/catalog.json") as fp:
catalog_data = json.load(fp)
assert "nodes" in catalog_data
assert len(catalog_data["nodes"]) == 4
table_node = catalog_data["nodes"]["model.test.table_model"]
self._assert_has_table_comments(table_node)

view_node = catalog_data["nodes"]["model.test.view_model"]
self._assert_has_view_comments(view_node)

no_docs_node = catalog_data["nodes"]["model.test.no_docs_model"]
self._assert_has_view_comments(no_docs_node, False, False)


class TestPersistDocsColumnMissing(BasePersistDocsColumnMissing):
pass


class TestPersistDocsCommentOnQuotedColumn(BasePersistDocsCommentOnQuotedColumn):
pass

This file was deleted.

10 changes: 0 additions & 10 deletions tests/integration/persist_docs/models/my_fun_docs.md

This file was deleted.

1 change: 0 additions & 1 deletion tests/integration/persist_docs/models/no_docs_model.sql

This file was deleted.

119 changes: 0 additions & 119 deletions tests/integration/persist_docs/models/schema.yml

This file was deleted.

2 changes: 0 additions & 2 deletions tests/integration/persist_docs/models/table_delta_model.sql

This file was deleted.

2 changes: 0 additions & 2 deletions tests/integration/persist_docs/models/view_model.sql

This file was deleted.

3 changes: 0 additions & 3 deletions tests/integration/persist_docs/seeds/seed.csv

This file was deleted.

26 changes: 0 additions & 26 deletions tests/integration/persist_docs/seeds/seeds.yml

This file was deleted.

Loading

0 comments on commit 3af8fd3

Please sign in to comment.