From d80431a27375b99ac37bf3562374c9a3aad03ed9 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 21 Jan 2025 12:39:04 +0000 Subject: [PATCH] Fix #11000 #11001: Add `doc_blocks` to manifest for nodes and columns --- core/dbt/artifacts/resources/v1/components.py | 2 + core/dbt/parser/manifest.py | 46 ++++++++++++++++++- .../functional/docs/test_good_docs_blocks.py | 46 ++++++++++++++++++- 3 files changed, 90 insertions(+), 4 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 8eb43f35d8e..8461ec75ea4 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -68,6 +68,7 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin): tags: List[str] = field(default_factory=list) _extra: Dict[str, Any] = field(default_factory=dict) granularity: Optional[TimeGranularity] = None + doc_blocks: List[List[str]] = field(default_factory=list) @dataclass @@ -197,6 +198,7 @@ class ParsedResource(ParsedResourceMandatory): unrendered_config_call_dict: Dict[str, Any] = field(default_factory=dict) relation_name: Optional[str] = None raw_code: str = "" + doc_blocks: List[List[str]] = field(default_factory=list) def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None): dct = super().__post_serialize__(dct, context) diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 023c5db9300..a01b5deb3bb 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -10,6 +10,7 @@ from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Tuple, Type, Union import msgpack +from jinja2.nodes import Call, TemplateData import dbt.deprecations import dbt.exceptions @@ -115,6 +116,7 @@ from dbt.parser.sources import SourcePatcher from dbt.parser.unit_tests import process_models_for_unit_test from dbt.version import __version__ +from dbt_common.clients.jinja import parse from dbt_common.clients.system import make_directory, path_exists, read_json, write_file from dbt_common.constants import SECRET_ENV_PREFIX from dbt_common.dataclass_schema import StrEnum, dbtClassMixin @@ -1657,14 +1659,54 @@ def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> None: DocsContextCallback = Callable[[ResultNode], Dict[str, Any]] +def _get_doc_blocks(s: str) -> Tuple[List[List[str]], bool]: + ast = parse(s) + has_doc_blocks = False + doc_blocks: List[List[str]] = [] + + if not hasattr(ast, "body"): + return doc_blocks, has_doc_blocks + + for statement in ast.body: + for node in statement.nodes: + if isinstance(node, TemplateData) and hasattr(node, "data"): + doc_blocks.append(["str", node.data]) + if ( + isinstance(node, Call) + and hasattr(node, "node") + and hasattr(node, "args") + and node.node.name == "doc" + ): + doc_block = ["doc"] + doc_block.extend([arg.value for arg in node.args]) + doc_blocks.append(doc_block) + has_doc_blocks = True + + return doc_blocks, has_doc_blocks + + +def _get_description_and_doc_blocks(description, context): + doc_blocks, has_doc_blocks = _get_doc_blocks(description) + + if has_doc_blocks: + description = get_rendered(description, context) + else: + doc_blocks = [] + + return description, doc_blocks + + # node and column descriptions def _process_docs_for_node( context: Dict[str, Any], node: ManifestNode, ): - node.description = get_rendered(node.description, context) + node.description, node.doc_blocks = _get_description_and_doc_blocks(node.description, context) + for column_name, column in node.columns.items(): - column.description = get_rendered(column.description, context) + column.description, column.doc_blocks = _get_description_and_doc_blocks( + column.description, context + ) # source and table descriptions, column descriptions diff --git a/tests/functional/docs/test_good_docs_blocks.py b/tests/functional/docs/test_good_docs_blocks.py index e1ed96c5eb7..b3602f76035 100644 --- a/tests/functional/docs/test_good_docs_blocks.py +++ b/tests/functional/docs/test_good_docs_blocks.py @@ -58,6 +58,8 @@ description: The user's first name - name: last_name description: "{{ doc('test', 'my_model_doc__last_name') }}" + - name: tricky + description: "{{ doc('my_model_doc__id') }} The user's first name {{ doc('test', 'my_model_doc__last_name') }}" """ @@ -82,6 +84,7 @@ def test_valid_doc_ref(self, project): model_data = manifest["nodes"]["model.test.model"] assert model_data["description"] == "My model is just a copy of the seed" + assert model_data["doc_blocks"] == [["doc", "my_model_doc"]] assert { "name": "id", @@ -92,6 +95,7 @@ def test_valid_doc_ref(self, project): "quote": None, "tags": [], "granularity": None, + "doc_blocks": [["doc", "my_model_doc__id"]], } == model_data["columns"]["id"] assert { @@ -103,6 +107,7 @@ def test_valid_doc_ref(self, project): "quote": None, "tags": [], "granularity": None, + "doc_blocks": [], } == model_data["columns"]["first_name"] assert { @@ -114,9 +119,26 @@ def test_valid_doc_ref(self, project): "quote": None, "tags": [], "granularity": None, + "doc_blocks": [["doc", "test", "my_model_doc__last_name"]], } == model_data["columns"]["last_name"] - assert len(model_data["columns"]) == 3 + assert { + "name": "tricky", + "description": "The user ID number The user's first name The user's last name", + "data_type": None, + "constraints": [], + "meta": {}, + "quote": None, + "tags": [], + "granularity": None, + "doc_blocks": [ + ["doc", "my_model_doc__id"], + ["str", " The user's first name "], + ["doc", "test", "my_model_doc__last_name"], + ], + } == model_data["columns"]["tricky"] + + assert len(model_data["columns"]) == 4 class TestGoodDocsBlocksAltPath: @@ -146,6 +168,7 @@ def test_alternative_docs_path(self, project): model_data = manifest["nodes"]["model.test.model"] assert model_data["description"] == "Alt text about the model" + assert model_data["doc_blocks"] == [["doc", "my_model_doc"]] assert { "name": "id", @@ -156,6 +179,7 @@ def test_alternative_docs_path(self, project): "quote": None, "tags": [], "granularity": None, + "doc_blocks": [["doc", "my_model_doc__id"]], } == model_data["columns"]["id"] assert { @@ -167,6 +191,7 @@ def test_alternative_docs_path(self, project): "quote": None, "tags": [], "granularity": None, + "doc_blocks": [], } == model_data["columns"]["first_name"] assert { @@ -178,6 +203,23 @@ def test_alternative_docs_path(self, project): "quote": None, "tags": [], "granularity": None, + "doc_blocks": [["doc", "test", "my_model_doc__last_name"]], } == model_data["columns"]["last_name"] - assert len(model_data["columns"]) == 3 + assert { + "name": "tricky", + "description": "The user ID number with alternative text The user's first name The user's last name in this other file", + "data_type": None, + "constraints": [], + "meta": {}, + "quote": None, + "tags": [], + "granularity": None, + "doc_blocks": [ + ["doc", "my_model_doc__id"], + ["str", " The user's first name "], + ["doc", "test", "my_model_doc__last_name"], + ], + } == model_data["columns"]["tricky"] + + assert len(model_data["columns"]) == 4