From b99157f566d1a355430b68a66079dbc2e3e884a2 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:07:33 -0700 Subject: [PATCH 01/22] add dbt-cloud config --- dbt_project.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbt_project.yml b/dbt_project.yml index b2e61ec6..122ea8b6 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -58,3 +58,7 @@ seeds: +column_types: effective_date: DATE rate: NUMBER + +dbt-cloud: + defer-env-id: '218762' + project-id: '270542' From 9fa591eb02fe986d7ca96d9a1c57fe3b61cf524b Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:07:57 -0700 Subject: [PATCH 02/22] Add column aware script and action --- .github/workflows/column_aware_ci.yml | 38 ++++ scripts/column_aware_ci.py | 297 ++++++++++++++++++++++++++ 2 files changed, 335 insertions(+) create mode 100644 .github/workflows/column_aware_ci.yml create mode 100644 scripts/column_aware_ci.py diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml new file mode 100644 index 00000000..4b0a4eeb --- /dev/null +++ b/.github/workflows/column_aware_ci.yml @@ -0,0 +1,38 @@ +name: Column Aware dbt Cloud CI + +on: + workflow_dispatch: + pull_request: + branches: + - main + types: + - opened + - reopened + - synchronize + - ready_for_review + +jobs: + trigger-dbt-ci-job: + if: github.event.pull_request.draft == false + runs-on: ubuntu-latest + env: + DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} + DBT_CLOUD_ACCOUNT_ID: 43786 + DBT_CLOUD_JOB_ID: 567183 + DBT_CLOUD_HOST: cloud.getdbt.com + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + - uses: yezz123/setup-uv@v4 + - name: Install packages + run: | + uv pip install dbtc sqlglot + env: + UV_SYSTEM_PYTHON: 1 + + - name: Trigger DBT Cloud Job + run: uv run scripts/column_aware_ci.py diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py new file mode 100644 index 00000000..1de1f11d --- /dev/null +++ b/scripts/column_aware_ci.py @@ -0,0 +1,297 @@ +# stdlib +import enum +import json +import logging +import os +import re +import subprocess +import sys +from dataclasses import dataclass + +# third party +from dbtc import dbtCloudClient +from sqlglot import parse_one, diff +from sqlglot.expressions import Column + + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +@dataclass +class Node: + unique_id: str + target_code: str + source_code: str | None = None + + +class JobRunStatus(enum.IntEnum): + QUEUED = 1 + STARTING = 2 + RUNNING = 3 + SUCCESS = 10 + ERROR = 20 + CANCELLED = 30 + + +# Env Vars +DBT_CLOUD_SERVICE_TOKEN = os.environ["DBT_CLOUD_SERVICE_TOKEN"] +DBT_CLOUD_ACCOUNT_ID = os.environ["DBT_CLOUD_ACCOUNT_ID"] +DBT_CLOUD_JOB_ID = os.environ["DBT_CLOUD_JOB_ID"] +DBT_CLOUD_HOST = os.getenv("DBT_CLOUD_HOST", "cloud.getdbt.com") +GITHUB_TOKEN = os.environ["GITHUB_TOKEN"] +GITHUB_BRANCH = os.environ["GITHUB_HEAD_REF"] +GITHUB_REPO = os.environ["GITHUB_REPOSITORY"] +GITHUB_REF = os.environ["GITHUB_REF"] + + +dbtc_client = dbtCloudClient(host=DBT_CLOUD_HOST) + + +def get_dev_nodes() -> dict[str, Node]: + with open("target/run_results.json") as rr: + run_results_json = json.load(rr) + + run_results = {} + for result in run_results_json["results"]: + unique_id = result["unique_id"] + relation_name = result["relation_name"] + if relation_name is not None: + logger.info(f"Retrieved compiled code for {unique_id}") + run_results[unique_id] = Node( + unique_id=unique_id, + target_code=result["compiled_code"], + ) + + return run_results + + +def add_deferring_node_code( + nodes: dict[str, Node], environment_id: int +) -> list[Node]: + + query = """ + query Environment($environmentId: BigInt!, $filter: ModelAppliedFilter, $first: Int, $after: String) { + environment(id: $environmentId) { + applied { + models(filter: $filter, first: $first, after: $after) { + edges { + node { + compiledCode + uniqueId + } + } + pageInfo { + endCursor + hasNextPage + hasPreviousPage + startCursor + } + totalCount + } + } + } + } + """ + + variables = { + "first": 500, + "after": None, + "environmentId": environment_id, + "filter": {"uniqueIds": [node.unique_id for node in nodes.values()]} + } + + logger.info("Querying discovery API for compiled code...") + + deferring_env_nodes = dbtc_client.metadata.query( + query, variables, paginated_request_to_list=True + ) + + for deferring_env_node in deferring_env_nodes: + unique_id = deferring_env_node["node"]["uniqueId"] + if unique_id in nodes: + nodes[unique_id].source_code = deferring_env_node["node"]["compiledCode"] + + # Assumption: Anything net new (e.g. nothing in the deferred env) shouldn't have + # anything excluded, so we're not using it beyond this point. + return {k: v for k, v in nodes.items() if v.source_code} + + +def trigger_job(steps_override: list[str] = None) -> None: + + def extract_pr_number(s): + match = re.search(r"refs/pull/(\d+)/merge", s) + return int(match.group(1)) if match else None + + # Extract PR Number + pull_request_id = extract_pr_number(GITHUB_REF) + + # Create schema + schema_override = f"dbt_cloud_pr_{DBT_CLOUD_JOB_ID}_{pull_request_id}" + + # Create payload to pass to job + # https://docs.getdbt.com/docs/deploy/ci-jobs#trigger-a-ci-job-with-the-api + payload = { + "cause": "Column-aware CI", + "schema_override": schema_override, + "git_branch": GITHUB_BRANCH, + "github_pull_request_id": pull_request_id, + } + + if steps_override is not None: + payload["steps_override"] = steps_override + + run = dbtc_client.cloud.trigger_job( + DBT_CLOUD_ACCOUNT_ID, DBT_CLOUD_JOB_ID, payload, should_poll=True + ) + + run_status = run["status"] + if run_status in (JobRunStatus.ERROR, JobRunStatus.CANCELLED): + sys.exit(1) + + sys.exit(0) + + +class NodeDiff: + + COLUMN_QUERY = """ + query Column($environmentId: BigInt!, $nodeUniqueId: String!, $filters: ColumnLineageFilter) { + column(environmentId: $environmentId) { + lineage(nodeUniqueId: $nodeUniqueId, filters: $filters) { + nodeUniqueId + relationship + } + } + } + """ + + def __init__(self, node: Node, environment_id: int): + self.node = node + self.environment_id = environment_id + self.source = parse_one(node.source_code) + self.target = parse_one(node.target_code) + self.changes = diff(self.source, self.target, delta_only=True) + self.downstream_models = set() + + # Only returning column changes now + for change in self.changes: + if hasattr(change, "expression"): + expression = change.expression + while True: + column = expression.find(Column) + if column is not None: + self.downstream_models.update( + self._get_downstream_models_from_column(column.name) + ) + break + elif expression.depth < 1: + break + expression = expression.parent + + + def _get_downstream_models_from_column(self, column_name: str) -> list[str]: + variables = { + "environmentId": self.environment_id, + "nodeUniqueId": self.node.unique_id, + "filters": {"columnName": column_name.upper()} + } + results = dbtc_client.metadata.query(self.COLUMN_QUERY, variables) + try: + lineage = results["data"]["column"]["lineage"] + except Exception as e: + logger.error( + f"Error occurred retrieving column lineage for {column_name}" + f"in {self.node.unique_id}:\n{e}" + ) + return [] + + downstream_models = list() + for node in lineage: + if node["relationship"] == "child": + downstream_models.append(node["nodeUniqueId"]) + + return downstream_models + + +if __name__ == "__main__": + + logger.info("Compiling modified models...") + + cmd = ["dbt", "compile", "--select", "state:modified"] + result = subprocess.run(cmd, capture_output=True, text=True) + + logger.info("Retrieving compiled code...") + + nodes = get_dev_nodes() + + logger.info("Retrieving modified and downstream models...") + + # Understand all modified and anything downstream by using `dbt ls` + cmd = ["dbt", "ls", "--resource-type", "model", "--select", "state:modified+", "--output", "json"] + result = subprocess.run(cmd, capture_output=True, text=True) + lines = result.stdout.split("\n") + all_unique_ids = set() + for line in lines: + json_str = line[line.find('{'):line.rfind('}')+1] + try: + data = json.loads(json_str) + all_unique_ids.add(data["unique_id"]) + except ValueError: + continue + + # Remove the modified models from this set because they should not be excluded + for key in nodes.keys(): + all_unique_ids.discard(key) + + # If nothing exists in all_unique_ids, nothing is downstream, nothing to exclude + if not all_unique_ids: + + logger.info("Nothing downstream exists, triggering as normal...") + + trigger_job() + + logger.info("Retrieving CI Job, determining deferring environment...") + + # Retrieve the CI job so we can get the deferring environment_id + ci_job = dbtc_client.cloud.get_job(DBT_CLOUD_ACCOUNT_ID, DBT_CLOUD_JOB_ID) + + environment_id: int = None + if ( + "data" in ci_job + and isinstance(ci_job["data"], dict) + and ci_job["data"].get("deferring_environment_id", None) is not None + ): + environment_id = ci_job["data"]["deferring_environment_id"] + + if environment_id is None: + raise Exception( + "Unable to get the CI job's deferring environment ID. See response below:\n" + f"{ci_job}" + ) + + logger.info("Adding compiled code from deferred environment...") + nodes = add_deferring_node_code(nodes, environment_id) + + diffs = [] + for node in nodes.values(): + diffs.append(NodeDiff(node, environment_id)) + + all_downstream_models = set().union(*[node_diff.downstream_models for node_diff in diffs]) + excluded_models = all_unique_ids - all_downstream_models + if not excluded_models: + + logger.info("No models downstream to exclude, triggering as normal...") + + trigger_job() + + excluded_models_str = " ".join([e.split(".")[-1] for e in excluded_models]) + logger.info("Downstream models are not impacted by column changes...") + logger.info(f"Excluding the following: {excluded_models_str}") + + steps_override = [ + f"dbt build -s state:modified+ --exclude {excluded_models_str}" + ] + + run = trigger_job(steps_override) From ca219241d2b837e39535ae32f01f82d3540223a1 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:09:40 -0700 Subject: [PATCH 03/22] Stop logging action from triggering --- .github/workflows/example_ci_logging.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/example_ci_logging.yml b/.github/workflows/example_ci_logging.yml index 34495d30..070e40bf 100644 --- a/.github/workflows/example_ci_logging.yml +++ b/.github/workflows/example_ci_logging.yml @@ -2,14 +2,14 @@ name: Trigger DBT Cloud Job on: workflow_dispatch: - pull_request: - branches: - - main - types: - - opened - - reopened - - synchronize - - ready_for_review +# pull_request: +# branches: +# - main +# types: +# - opened +# - reopened +# - synchronize +# - ready_for_review jobs: trigger-dbt-ci-job: From cbc856dc3c37fb2dbfa0c471776e6ab4e1cb9f69 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:12:57 -0700 Subject: [PATCH 04/22] install dbt derrr --- .github/workflows/column_aware_ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index 4b0a4eeb..60517b17 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -30,7 +30,7 @@ jobs: - uses: yezz123/setup-uv@v4 - name: Install packages run: | - uv pip install dbtc sqlglot + uv pip install dbt dbtc sqlglot env: UV_SYSTEM_PYTHON: 1 From 7c6b32f0a3d9e549fe711900014abf269d3662fe Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:15:26 -0700 Subject: [PATCH 05/22] Create dbt_cloud profile --- .github/workflows/column_aware_ci.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index 60517b17..a0d31394 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -17,7 +17,10 @@ jobs: runs-on: ubuntu-latest env: DBT_CLOUD_SERVICE_TOKEN: ${{ secrets.DBT_CLOUD_SERVICE_TOKEN }} - DBT_CLOUD_ACCOUNT_ID: 43786 + DBT_CLOUD_API_KEY: ${{ secrets.DBT_CLOUD_API_KEY }} + DBT_CLOUD_ACCOUNT_ID: ${{ secrets.DBT_CLOUD_ACCOUNT_ID }} + DBT_CLOUD_PROJECT_ID: ${{ secrets.DBT_CLOUD_PROJECT_ID }} + DBT_CLOUD_PROJECT_NAME: "Main" DBT_CLOUD_JOB_ID: 567183 DBT_CLOUD_HOST: cloud.getdbt.com GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -35,4 +38,7 @@ jobs: UV_SYSTEM_PYTHON: 1 - name: Trigger DBT Cloud Job - run: uv run scripts/column_aware_ci.py + run: | + mkdir ~/.dbt/ + uv run scripts/create_profile.py > ~/.dbt/dbt_cloud.yml + uv run scripts/column_aware_ci.py From 47e46fdfcdb0408ebe451173a4051125803e4c8b Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:16:56 -0700 Subject: [PATCH 06/22] Install yaml --- .github/workflows/column_aware_ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index a0d31394..d5a3c13a 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -33,7 +33,7 @@ jobs: - uses: yezz123/setup-uv@v4 - name: Install packages run: | - uv pip install dbt dbtc sqlglot + uv pip install dbt dbtc pyyaml sqlglot env: UV_SYSTEM_PYTHON: 1 From 6d66ad7e0fac8de2832cc25c2ecb89c7e1268bf9 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:18:03 -0700 Subject: [PATCH 07/22] add another env var --- .github/workflows/column_aware_ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index d5a3c13a..17c7904d 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -1,4 +1,4 @@ -name: Column Aware dbt Cloud CI +name: Column Aware CI on: workflow_dispatch: @@ -21,6 +21,7 @@ jobs: DBT_CLOUD_ACCOUNT_ID: ${{ secrets.DBT_CLOUD_ACCOUNT_ID }} DBT_CLOUD_PROJECT_ID: ${{ secrets.DBT_CLOUD_PROJECT_ID }} DBT_CLOUD_PROJECT_NAME: "Main" + DBT_CLOUD_ACCOUNT_NAME: "Doug Sandbox" DBT_CLOUD_JOB_ID: 567183 DBT_CLOUD_HOST: cloud.getdbt.com GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From b7abbae6a8ef3db427a37ac2f8613c47d22b51b5 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:20:04 -0700 Subject: [PATCH 08/22] Try different subprocess cmd --- scripts/column_aware_ci.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py index 1de1f11d..6480a44b 100644 --- a/scripts/column_aware_ci.py +++ b/scripts/column_aware_ci.py @@ -220,7 +220,7 @@ def _get_downstream_models_from_column(self, column_name: str) -> list[str]: logger.info("Compiling modified models...") cmd = ["dbt", "compile", "--select", "state:modified"] - result = subprocess.run(cmd, capture_output=True, text=True) + result = subprocess.Popen(cmd, capture_output=True, text=True) logger.info("Retrieving compiled code...") @@ -230,7 +230,7 @@ def _get_downstream_models_from_column(self, column_name: str) -> list[str]: # Understand all modified and anything downstream by using `dbt ls` cmd = ["dbt", "ls", "--resource-type", "model", "--select", "state:modified+", "--output", "json"] - result = subprocess.run(cmd, capture_output=True, text=True) + result = subprocess.Popen(cmd, capture_output=True, text=True) lines = result.stdout.split("\n") all_unique_ids = set() for line in lines: From b2c54b0521def1f4d4da5c7e3f155202f9e2b090 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:26:25 -0700 Subject: [PATCH 09/22] Try adding an env argument to subprocess.run --- scripts/column_aware_ci.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py index 6480a44b..d67ea27c 100644 --- a/scripts/column_aware_ci.py +++ b/scripts/column_aware_ci.py @@ -217,10 +217,12 @@ def _get_downstream_models_from_column(self, column_name: str) -> list[str]: if __name__ == "__main__": + current_env = os.environ.copy() + logger.info("Compiling modified models...") cmd = ["dbt", "compile", "--select", "state:modified"] - result = subprocess.Popen(cmd, capture_output=True, text=True) + result = subprocess.run(cmd, capture_output=True, text=True, env=current_env) logger.info("Retrieving compiled code...") @@ -230,7 +232,7 @@ def _get_downstream_models_from_column(self, column_name: str) -> list[str]: # Understand all modified and anything downstream by using `dbt ls` cmd = ["dbt", "ls", "--resource-type", "model", "--select", "state:modified+", "--output", "json"] - result = subprocess.Popen(cmd, capture_output=True, text=True) + result = subprocess.run(cmd, capture_output=True, text=True, env=current_env) lines = result.stdout.split("\n") all_unique_ids = set() for line in lines: From 9dbaf41fb911b809b32e2d620131dc494e3a701f Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:29:57 -0700 Subject: [PATCH 10/22] der --- .github/workflows/column_aware_ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index 17c7904d..f7082f85 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -40,6 +40,8 @@ jobs: - name: Trigger DBT Cloud Job run: | + which dbt + bash -c "echo $PATH" mkdir ~/.dbt/ uv run scripts/create_profile.py > ~/.dbt/dbt_cloud.yml uv run scripts/column_aware_ci.py From b552f0c567ed533665ff63494557b55de1da4ca9 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:33:24 -0700 Subject: [PATCH 11/22] appears to be a uv issue --- .github/workflows/column_aware_ci.yml | 12 ++++++------ scripts/column_aware_ci.py | 6 ++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index f7082f85..7bcadcf8 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -31,17 +31,17 @@ jobs: - uses: actions/setup-python@v4 with: python-version: "3.11" - - uses: yezz123/setup-uv@v4 +# - uses: yezz123/setup-uv@v4 - name: Install packages run: | - uv pip install dbt dbtc pyyaml sqlglot - env: - UV_SYSTEM_PYTHON: 1 + pip install dbt dbtc pyyaml sqlglot +# env: +# UV_SYSTEM_PYTHON: 1 - name: Trigger DBT Cloud Job run: | which dbt bash -c "echo $PATH" mkdir ~/.dbt/ - uv run scripts/create_profile.py > ~/.dbt/dbt_cloud.yml - uv run scripts/column_aware_ci.py + scripts/create_profile.py > ~/.dbt/dbt_cloud.yml + scripts/column_aware_ci.py diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py index d67ea27c..1de1f11d 100644 --- a/scripts/column_aware_ci.py +++ b/scripts/column_aware_ci.py @@ -217,12 +217,10 @@ def _get_downstream_models_from_column(self, column_name: str) -> list[str]: if __name__ == "__main__": - current_env = os.environ.copy() - logger.info("Compiling modified models...") cmd = ["dbt", "compile", "--select", "state:modified"] - result = subprocess.run(cmd, capture_output=True, text=True, env=current_env) + result = subprocess.run(cmd, capture_output=True, text=True) logger.info("Retrieving compiled code...") @@ -232,7 +230,7 @@ def _get_downstream_models_from_column(self, column_name: str) -> list[str]: # Understand all modified and anything downstream by using `dbt ls` cmd = ["dbt", "ls", "--resource-type", "model", "--select", "state:modified+", "--output", "json"] - result = subprocess.run(cmd, capture_output=True, text=True, env=current_env) + result = subprocess.run(cmd, capture_output=True, text=True) lines = result.stdout.split("\n") all_unique_ids = set() for line in lines: From cbb356cfecc6f16cbfb5f2fc4619b149b67f13aa Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:37:36 -0700 Subject: [PATCH 12/22] permissions --- scripts/column_aware_ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py index 1de1f11d..975b8f51 100644 --- a/scripts/column_aware_ci.py +++ b/scripts/column_aware_ci.py @@ -132,7 +132,7 @@ def extract_pr_number(s): schema_override = f"dbt_cloud_pr_{DBT_CLOUD_JOB_ID}_{pull_request_id}" # Create payload to pass to job - # https://docs.getdbt.com/docs/deploy/ci-jobs#trigger-a-ci-job-with-the-api + # https://docs.getdbt.com/docs/deploy/ci-jobs#trigger-a-ci-job-with-the-api payload = { "cause": "Column-aware CI", "schema_override": schema_override, From 2c32694b65fa049f26eeab4537ddb14c055d3b53 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:40:08 -0700 Subject: [PATCH 13/22] permissions again --- scripts/column_aware_ci.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/column_aware_ci.py diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py old mode 100644 new mode 100755 From a0d670a02ae133cc852e6b5de719e7235638b52e Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:40:53 -0700 Subject: [PATCH 14/22] i dunno --- .github/workflows/column_aware_ci.yml | 4 ++-- scripts/column_aware_ci.py | 0 2 files changed, 2 insertions(+), 2 deletions(-) mode change 100755 => 100644 scripts/column_aware_ci.py diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index 7bcadcf8..8f90a1f2 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -43,5 +43,5 @@ jobs: which dbt bash -c "echo $PATH" mkdir ~/.dbt/ - scripts/create_profile.py > ~/.dbt/dbt_cloud.yml - scripts/column_aware_ci.py + ./scripts/create_profile.py > ~/.dbt/dbt_cloud.yml + ./scripts/column_aware_ci.py diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py old mode 100755 new mode 100644 From eda436db152ddb2df5529510e35e84a0d5ec29af Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:45:29 -0700 Subject: [PATCH 15/22] so stupid --- .github/workflows/column_aware_ci.yml | 2 -- scripts/column_aware_ci.py | 0 2 files changed, 2 deletions(-) mode change 100644 => 100755 scripts/column_aware_ci.py diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index 8f90a1f2..923ef21c 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -40,8 +40,6 @@ jobs: - name: Trigger DBT Cloud Job run: | - which dbt - bash -c "echo $PATH" mkdir ~/.dbt/ ./scripts/create_profile.py > ~/.dbt/dbt_cloud.yml ./scripts/column_aware_ci.py diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py old mode 100644 new mode 100755 From ed46f93ea0ad51724cbbc7e421d6585b8857cdbc Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:46:10 -0700 Subject: [PATCH 16/22] who knows --- scripts/column_aware_ci.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 scripts/column_aware_ci.py diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py old mode 100755 new mode 100644 From d2768b46150fd0c4f45de0ec6c3f4d280963f8bd Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:48:10 -0700 Subject: [PATCH 17/22] so stupid --- .github/workflows/column_aware_ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index 923ef21c..ee01716b 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -41,5 +41,5 @@ jobs: - name: Trigger DBT Cloud Job run: | mkdir ~/.dbt/ - ./scripts/create_profile.py > ~/.dbt/dbt_cloud.yml - ./scripts/column_aware_ci.py + scripts/create_profile.py > ~/.dbt/dbt_cloud.yml + scripts/column_aware_ci.py From 3cb18f14df30f36b6c1614ab7bd491e0039dfe07 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:50:35 -0700 Subject: [PATCH 18/22] x --- .github/workflows/column_aware_ci.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index ee01716b..176d2b53 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -31,15 +31,17 @@ jobs: - uses: actions/setup-python@v4 with: python-version: "3.11" -# - uses: yezz123/setup-uv@v4 + - uses: yezz123/setup-uv@v4 - name: Install packages run: | pip install dbt dbtc pyyaml sqlglot -# env: -# UV_SYSTEM_PYTHON: 1 + env: + UV_SYSTEM_PYTHON: 1 - name: Trigger DBT Cloud Job run: | mkdir ~/.dbt/ - scripts/create_profile.py > ~/.dbt/dbt_cloud.yml - scripts/column_aware_ci.py + uv run scripts/create_profile.py > ~/.dbt/dbt_cloud.yml + which uv + which dbt + uv run scripts/column_aware_ci.py From 3674fd2beab868d45c43fa6ab5afd97f6d5bc65a Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:52:09 -0700 Subject: [PATCH 19/22] yolo --- .github/workflows/column_aware_ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/column_aware_ci.yml b/.github/workflows/column_aware_ci.yml index 176d2b53..cae89f40 100644 --- a/.github/workflows/column_aware_ci.yml +++ b/.github/workflows/column_aware_ci.yml @@ -42,6 +42,4 @@ jobs: run: | mkdir ~/.dbt/ uv run scripts/create_profile.py > ~/.dbt/dbt_cloud.yml - which uv - which dbt uv run scripts/column_aware_ci.py From a125c8e6dead339be92793d37d7f907ff13e68de Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:55:38 -0700 Subject: [PATCH 20/22] Why is there a model here? --- scripts/column_aware_ci.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py index 975b8f51..3cb1c248 100644 --- a/scripts/column_aware_ci.py +++ b/scripts/column_aware_ci.py @@ -53,7 +53,8 @@ class JobRunStatus(enum.IntEnum): def get_dev_nodes() -> dict[str, Node]: with open("target/run_results.json") as rr: run_results_json = json.load(rr) - + + logger.info(f"Compiled code:\n{run_results_json}") run_results = {} for result in run_results_json["results"]: unique_id = result["unique_id"] From 3e89e2388a328247e7ce69d8d927e20149b03b32 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:57:28 -0700 Subject: [PATCH 21/22] Who knows --- scripts/column_aware_ci.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/column_aware_ci.py b/scripts/column_aware_ci.py index 3cb1c248..d80c4fb5 100644 --- a/scripts/column_aware_ci.py +++ b/scripts/column_aware_ci.py @@ -54,7 +54,6 @@ def get_dev_nodes() -> dict[str, Node]: with open("target/run_results.json") as rr: run_results_json = json.load(rr) - logger.info(f"Compiled code:\n{run_results_json}") run_results = {} for result in run_results_json["results"]: unique_id = result["unique_id"] From 8d4a98b4b258ad48fa42b0e040a97d684f12d736 Mon Sep 17 00:00:00 2001 From: Doug Guthrie Date: Fri, 20 Dec 2024 12:59:25 -0700 Subject: [PATCH 22/22] add a test model --- models/marts/aggregates/agg_parts_price.sql | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 models/marts/aggregates/agg_parts_price.sql diff --git a/models/marts/aggregates/agg_parts_price.sql b/models/marts/aggregates/agg_parts_price.sql new file mode 100644 index 00000000..a1d4ef85 --- /dev/null +++ b/models/marts/aggregates/agg_parts_price.sql @@ -0,0 +1,7 @@ +{{ config(materialized='table') }} + +select + manufacturer, + sum(retail_price) as total_retail_price +from {{ ref('dim_parts') }} +group by 1 \ No newline at end of file