diff --git a/.bumpversion.cfg b/.bumpversion.cfg index e74c22bc8..24e904ac0 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.8.0a1 +current_version = 1.9.0a1 parse = (?P[\d]+) # major version number \.(?P[\d]+) # minor version number \.(?P[\d]+) # patch version number @@ -32,6 +32,4 @@ first_value = 1 [bumpversion:part:nightly] -[bumpversion:file:setup.py] - [bumpversion:file:dbt/adapters/bigquery/__version__.py] diff --git a/.changes/unreleased/Dependencies-20231002-164012.yaml b/.changes/unreleased/Dependencies-20231002-164012.yaml deleted file mode 100644 index 344aeb148..000000000 --- a/.changes/unreleased/Dependencies-20231002-164012.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update ddtrace requirement from ~=1.19 to ~=1.20" -time: 2023-10-02T16:40:12.00000Z -custom: - Author: dependabot[bot] - PR: 948 diff --git a/.changes/unreleased/Dependencies-20231009-005842.yaml b/.changes/unreleased/Dependencies-20231009-005842.yaml deleted file mode 100644 index acedd8d52..000000000 --- a/.changes/unreleased/Dependencies-20231009-005842.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pre-commit-hooks requirement from ~=4.4 to ~=4.5" -time: 2023-10-09T00:58:42.00000Z -custom: - Author: dependabot[bot] - PR: 960 diff --git a/.changes/unreleased/Dependencies-20231011-002031.yaml b/.changes/unreleased/Dependencies-20231011-002031.yaml deleted file mode 100644 index 5cc3c36a1..000000000 --- a/.changes/unreleased/Dependencies-20231011-002031.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.5.1 to 1.6.0" -time: 2023-10-11T00:20:31.00000Z -custom: - Author: dependabot[bot] - PR: 963 diff --git a/.changes/unreleased/Dependencies-20231016-002928.yaml b/.changes/unreleased/Dependencies-20231016-002928.yaml deleted file mode 100644 index ac9470de7..000000000 --- a/.changes/unreleased/Dependencies-20231016-002928.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pre-commit requirement from ~=3.4 to ~=3.5" -time: 2023-10-16T00:29:28.00000Z -custom: - Author: dependabot[bot] - PR: 969 diff --git a/.changes/unreleased/Dependencies-20231018-010429.yaml b/.changes/unreleased/Dependencies-20231018-010429.yaml deleted file mode 100644 index fec345104..000000000 --- a/.changes/unreleased/Dependencies-20231018-010429.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.9 to ~=23.10" -time: 2023-10-18T01:04:29.00000Z -custom: - Author: dependabot[bot] - PR: 973 diff --git a/.changes/unreleased/Dependencies-20231027-132742.yaml b/.changes/unreleased/Dependencies-20231027-132742.yaml deleted file mode 100644 index d72ac124b..000000000 --- a/.changes/unreleased/Dependencies-20231027-132742.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.6.0 to 1.6.1" -time: 2023-10-27T13:27:42.00000Z -custom: - Author: dependabot[bot] - PR: 985 diff --git a/.changes/unreleased/Dependencies-20231027-201709.yaml b/.changes/unreleased/Dependencies-20231027-201709.yaml deleted file mode 100644 index e9d2a1adb..000000000 --- a/.changes/unreleased/Dependencies-20231027-201709.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update ddtrace requirement from ~=1.20 to ~=2.1" -time: 2023-10-27T20:17:09.00000Z -custom: - Author: dependabot[bot] - PR: 989 diff --git a/.changes/unreleased/Dependencies-20231109-005623.yaml b/.changes/unreleased/Dependencies-20231109-005623.yaml deleted file mode 100644 index bc4ca2342..000000000 --- a/.changes/unreleased/Dependencies-20231109-005623.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.10 to ~=23.11" -time: 2023-11-09T00:56:23.00000Z -custom: - Author: dependabot[bot] - PR: 1013 diff --git a/.changes/unreleased/Dependencies-20231113-002529.yaml b/.changes/unreleased/Dependencies-20231113-002529.yaml deleted file mode 100644 index ad7272882..000000000 --- a/.changes/unreleased/Dependencies-20231113-002529.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pytest-xdist requirement from ~=3.3 to ~=3.4" -time: 2023-11-13T00:25:29.00000Z -custom: - Author: dependabot[bot] - PR: 1022 diff --git a/.changes/unreleased/Dependencies-20231113-002621.yaml b/.changes/unreleased/Dependencies-20231113-002621.yaml deleted file mode 100644 index d889da51e..000000000 --- a/.changes/unreleased/Dependencies-20231113-002621.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.6.1 to 1.7.0" -time: 2023-11-13T00:26:21.00000Z -custom: - Author: dependabot[bot] - PR: 1023 diff --git a/.changes/unreleased/Dependencies-20231116-001342.yaml b/.changes/unreleased/Dependencies-20231116-001342.yaml deleted file mode 100644 index 6af15169f..000000000 --- a/.changes/unreleased/Dependencies-20231116-001342.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update ddtrace requirement from ~=2.1 to ~=2.2" -time: 2023-11-16T00:13:42.00000Z -custom: - Author: dependabot[bot] - PR: 1028 diff --git a/.changes/unreleased/Dependencies-20231127-004827.yaml b/.changes/unreleased/Dependencies-20231127-004827.yaml deleted file mode 100644 index a78708328..000000000 --- a/.changes/unreleased/Dependencies-20231127-004827.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update wheel requirement from ~=0.41 to ~=0.42" -time: 2023-11-27T00:48:27.00000Z -custom: - Author: dependabot[bot] - PR: 1033 diff --git a/.changes/unreleased/Dependencies-20231128-005012.yaml b/.changes/unreleased/Dependencies-20231128-005012.yaml deleted file mode 100644 index 236ca1e52..000000000 --- a/.changes/unreleased/Dependencies-20231128-005012.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.7.0 to 1.7.1" -time: 2023-11-28T00:50:12.00000Z -custom: - Author: dependabot[bot] - PR: 1034 diff --git a/.changes/unreleased/Dependencies-20231128-005103.yaml b/.changes/unreleased/Dependencies-20231128-005103.yaml deleted file mode 100644 index 205ecd1d8..000000000 --- a/.changes/unreleased/Dependencies-20231128-005103.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update ddtrace requirement from ~=2.2 to ~=2.3" -time: 2023-11-28T00:51:03.00000Z -custom: - Author: dependabot[bot] - PR: 1035 diff --git a/.changes/unreleased/Dependencies-20231129-001523.yaml b/.changes/unreleased/Dependencies-20231129-001523.yaml deleted file mode 100644 index e1c145ced..000000000 --- a/.changes/unreleased/Dependencies-20231129-001523.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pytest-xdist requirement from ~=3.4 to ~=3.5" -time: 2023-11-29T00:15:23.00000Z -custom: - Author: dependabot[bot] - PR: 1037 diff --git a/.changes/unreleased/Dependencies-20231204-003807.yaml b/.changes/unreleased/Dependencies-20231204-003807.yaml deleted file mode 100644 index 18b9bb618..000000000 --- a/.changes/unreleased/Dependencies-20231204-003807.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update freezegun requirement from ~=1.2 to ~=1.3" -time: 2023-12-04T00:38:07.00000Z -custom: - Author: dependabot[bot] - PR: 1040 diff --git a/.changes/unreleased/Dependencies-20231213-003845.yaml b/.changes/unreleased/Dependencies-20231213-003845.yaml deleted file mode 100644 index 93618b5e0..000000000 --- a/.changes/unreleased/Dependencies-20231213-003845.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.11 to ~=23.12" -time: 2023-12-13T00:38:45.00000Z -custom: - Author: dependabot[bot] - PR: 1056 diff --git a/.changes/unreleased/Dependencies-20240124-120321.yaml b/.changes/unreleased/Dependencies-20240124-120321.yaml deleted file mode 100644 index ef725de67..000000000 --- a/.changes/unreleased/Dependencies-20240124-120321.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Dependencies -body: get dbt-tests-adapters from dbt-adapters repo -time: 2024-01-24T12:03:21.523295-08:00 -custom: - Author: colin-rogers-dbt - PR: "1077" diff --git a/.changes/unreleased/Dependencies-20240520-230208.yaml b/.changes/unreleased/Dependencies-20240520-230208.yaml new file mode 100644 index 000000000..f89057233 --- /dev/null +++ b/.changes/unreleased/Dependencies-20240520-230208.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump ubuntu from 22.04 to 24.04 in /docker" +time: 2024-05-20T23:02:08.00000Z +custom: + Author: dependabot[bot] + PR: 1247 diff --git a/.changes/unreleased/Features-20240102-152030.yaml b/.changes/unreleased/Features-20240102-152030.yaml deleted file mode 100644 index 81c683de0..000000000 --- a/.changes/unreleased/Features-20240102-152030.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Update base adapter references as part of decoupling migration -time: 2024-01-02T15:20:30.038221-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1067" diff --git a/.changes/unreleased/Features-20240205-174614.yaml b/.changes/unreleased/Features-20240205-174614.yaml deleted file mode 100644 index 192273d3d..000000000 --- a/.changes/unreleased/Features-20240205-174614.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Features -body: Support all types for unit testing in dbt-bigquery, expand coverage of - safe_cast macro -time: 2024-02-05T17:46:14.505597-05:00 -custom: - Author: michelleark - Issue: "1090" diff --git a/.changes/unreleased/Features-20240426-105319.yaml b/.changes/unreleased/Features-20240426-105319.yaml new file mode 100644 index 000000000..0af2f9aa8 --- /dev/null +++ b/.changes/unreleased/Features-20240426-105319.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Add configuration options `enable_list_inference` and `intermediate_format` for python + models +time: 2024-04-26T10:53:19.874239-04:00 +custom: + Author: mikealfare + Issue: 1047 1114 diff --git a/.changes/unreleased/Features-20240430-185650.yaml b/.changes/unreleased/Features-20240430-185650.yaml new file mode 100644 index 000000000..0c0eef567 --- /dev/null +++ b/.changes/unreleased/Features-20240430-185650.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add tests for cross-database `cast` macro +time: 2024-04-30T18:56:50.238027-06:00 +custom: + Author: dbeatty10 + Issue: "1214" diff --git a/.changes/unreleased/Features-20240501-151902.yaml b/.changes/unreleased/Features-20240501-151902.yaml new file mode 100644 index 000000000..1522e9775 --- /dev/null +++ b/.changes/unreleased/Features-20240501-151902.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Cross-database `date` macro +time: 2024-05-01T15:19:02.929676-06:00 +custom: + Author: dbeatty10 + Issue: 1221 diff --git a/.changes/unreleased/Fixes-20231023-082312.yaml b/.changes/unreleased/Fixes-20231023-082312.yaml deleted file mode 100644 index 368c58e95..000000000 --- a/.changes/unreleased/Fixes-20231023-082312.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Patch for json inline --show -time: 2023-10-23T08:23:12.245223-06:00 -custom: - Author: matt-winkler - Issue: "972" diff --git a/.changes/unreleased/Fixes-20231025-131907.yaml b/.changes/unreleased/Fixes-20231025-131907.yaml deleted file mode 100644 index 9a3b8d8a8..000000000 --- a/.changes/unreleased/Fixes-20231025-131907.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Lower bound of `2.11.0` for `google-api-core` -time: 2023-10-25T13:19:07.580826-06:00 -custom: - Author: gmyrianthous dbeatty10 - Issue: "979" diff --git a/.changes/unreleased/Fixes-20231025-223003.yaml b/.changes/unreleased/Fixes-20231025-223003.yaml deleted file mode 100644 index ebec94a30..000000000 --- a/.changes/unreleased/Fixes-20231025-223003.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix refresh syntax, config comparison with empty labels -time: 2023-10-25T22:30:03.0034-04:00 -custom: - Author: mikealfare - Issue: "983" diff --git a/.changes/unreleased/Fixes-20231030-222134.yaml b/.changes/unreleased/Fixes-20231030-222134.yaml deleted file mode 100644 index 62bfc5f27..000000000 --- a/.changes/unreleased/Fixes-20231030-222134.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Assign the correct relation type to materialized views in catalog queries -time: 2023-10-30T22:21:34.401675-04:00 -custom: - Author: mikealfare - Issue: "995" diff --git a/.changes/unreleased/Fixes-20231105-125740.yaml b/.changes/unreleased/Fixes-20231105-125740.yaml deleted file mode 100644 index 928fbb302..000000000 --- a/.changes/unreleased/Fixes-20231105-125740.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix inline comments (--) on the last line of an incremental model -time: 2023-11-05T12:57:40.289399+09:00 -custom: - Author: tnk-ysk - Issue: "896" diff --git a/.changes/unreleased/Fixes-20231107-100905.yaml b/.changes/unreleased/Fixes-20231107-100905.yaml deleted file mode 100644 index 942298ed9..000000000 --- a/.changes/unreleased/Fixes-20231107-100905.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Support agate Integer type, test with empty seed -time: 2023-11-07T10:09:05.723451-05:00 -custom: - Author: gshank - Issue: "1003" diff --git a/.changes/unreleased/Fixes-20231107-174352.yaml b/.changes/unreleased/Fixes-20231107-174352.yaml deleted file mode 100644 index 80592758d..000000000 --- a/.changes/unreleased/Fixes-20231107-174352.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Fixes -body: Fixed issue where materialized views were failing on re-run with minimal config - parameters -time: 2023-11-07T17:43:52.972135-05:00 -custom: - Author: "mikealfare" - Issue: "1007" diff --git a/.changes/unreleased/Fixes-20231108-171128.yaml b/.changes/unreleased/Fixes-20231108-171128.yaml deleted file mode 100644 index 116ff00d2..000000000 --- a/.changes/unreleased/Fixes-20231108-171128.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix broken partition config granularity and batch_id being set to None -time: 2023-11-08T17:11:28.819877-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1006" diff --git a/.changes/unreleased/Fixes-20231111-150959.yaml b/.changes/unreleased/Fixes-20231111-150959.yaml deleted file mode 100644 index 3d9f245a6..000000000 --- a/.changes/unreleased/Fixes-20231111-150959.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: replace deterministic batch_id with uuid -time: 2023-11-11T15:09:59.243797-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1006" diff --git a/.changes/unreleased/Fixes-20240226-233024.yaml b/.changes/unreleased/Fixes-20240226-233024.yaml new file mode 100644 index 000000000..efb1b077c --- /dev/null +++ b/.changes/unreleased/Fixes-20240226-233024.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: make seed delimiter configurable via `field_delimeter` in model config +time: 2024-02-26T23:30:24.141213+01:00 +custom: + Author: salimmoulouel + Issue: "1119" diff --git a/.changes/unreleased/Fixes-20240426-105224.yaml b/.changes/unreleased/Fixes-20240426-105224.yaml new file mode 100644 index 000000000..624006ba5 --- /dev/null +++ b/.changes/unreleased/Fixes-20240426-105224.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Default `enableListInference` to `True` for python models to support nested + lists +time: 2024-04-26T10:52:24.827314-04:00 +custom: + Author: mikealfare + Issue: 1047 1114 diff --git a/.changes/unreleased/Under the Hood-20231109-095012.yaml b/.changes/unreleased/Under the Hood-20231109-095012.yaml deleted file mode 100644 index a93215e8f..000000000 --- a/.changes/unreleased/Under the Hood-20231109-095012.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Upgrade spark-bigquery Java deps for serverless to 2.13-0.34.0 -time: 2023-11-09T09:50:12.252774-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1006" diff --git a/.changes/unreleased/Under the Hood-20231116-062142.yaml b/.changes/unreleased/Under the Hood-20231116-062142.yaml deleted file mode 100644 index c28270898..000000000 --- a/.changes/unreleased/Under the Hood-20231116-062142.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Primary and foreign key constraints are not enforced in BigQuery -time: 2023-11-16T06:21:42.935367-08:00 -custom: - Author: dbeatty10 - Issue: "1018" diff --git a/.changes/unreleased/Under the Hood-20231117-121214.yaml b/.changes/unreleased/Under the Hood-20231117-121214.yaml deleted file mode 100644 index 61b0617ad..000000000 --- a/.changes/unreleased/Under the Hood-20231117-121214.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Add tests for --empty flag -time: 2023-11-17T12:12:14.643365-05:00 -custom: - Author: michelleark - Issue: "1029" diff --git a/.changes/unreleased/Under the Hood-20240116-154305.yaml b/.changes/unreleased/Under the Hood-20240116-154305.yaml deleted file mode 100644 index bb115abd6..000000000 --- a/.changes/unreleased/Under the Hood-20240116-154305.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Migrate to dbt-common and dbt-adapters package -time: 2024-01-16T15:43:05.046735-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1071" diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f6283d123..02ed72d45 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,3 +1,3 @@ # This codeowners file is used to ensure all PRs require reviews from the adapters team -* @dbt-labs/core-adapters +* @dbt-labs/adapters diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 2a6f34492..ae2be43aa 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,8 +1,17 @@ version: 2 updates: - # python dependencies - package-ecosystem: "pip" directory: "/" schedule: interval: "daily" rebase-strategy: "disabled" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + rebase-strategy: "disabled" + - package-ecosystem: "docker" + directory: "/docker" + schedule: + interval: "weekly" + rebase-strategy: "disabled" diff --git a/.github/scripts/integration-test-matrix.js b/.github/scripts/integration-test-matrix.js index bf7fd2ef7..1a3136cf6 100644 --- a/.github/scripts/integration-test-matrix.js +++ b/.github/scripts/integration-test-matrix.js @@ -44,7 +44,7 @@ module.exports = ({ context }) => { if (labels.includes("test macos") || testAllLabel) { include.push({ - os: "macos-latest", + os: "macos-12", adapter, "python-version": pythonVersion, }); @@ -78,7 +78,7 @@ module.exports = ({ context }) => { // additionally include runs for all adapters, on macos and windows, // but only for the default python version for (const adapter of supportedAdapters) { - for (const operatingSystem of ["windows-latest", "macos-latest"]) { + for (const operatingSystem of ["windows-latest", "macos-12"]) { include.push({ os: operatingSystem, adapter: adapter, diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 7df6973a8..2ae9684d9 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -69,13 +69,13 @@ jobs: steps: - name: Check out the repository (non-PR) if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} @@ -88,7 +88,7 @@ jobs: # 'false' - if none of changed files matches any of filter rules # also, returns: # `changes` - JSON array with names of all filters matching any of the changed files - uses: dorny/paths-filter@v2 + uses: dorny/paths-filter@v3 id: get-changes with: token: ${{ secrets.GITHUB_TOKEN }} @@ -100,7 +100,7 @@ jobs: - name: Generate integration test matrix id: generate-matrix - uses: actions/github-script@v6 + uses: actions/github-script@v7 env: CHANGES: ${{ steps.get-changes.outputs.changes }} with: @@ -143,7 +143,7 @@ jobs: steps: - name: Check out the repository if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false @@ -151,13 +151,13 @@ jobs: # this is necessary for the `pull_request_target` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -223,7 +223,7 @@ jobs: steps: - name: Check out the repository if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false @@ -231,13 +231,13 @@ jobs: # this is necessary for the `pull_request_target` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python 3.8 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.8" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b13c53e9f..fdbd36dba 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,12 +43,12 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' @@ -79,12 +79,12 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -120,12 +120,12 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' @@ -173,12 +173,12 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest, macos-12, windows-latest] python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install python dependencies @@ -199,10 +199,10 @@ jobs: find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/ - name: Check wheel distributions run: | - dbt --version + python -c "import dbt.adapters.bigquery" - name: Install source distributions run: | find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/ - name: Check source distributions run: | - dbt --version + python -c "import dbt.adapters.bigquery" diff --git a/.github/workflows/nightly-release.yml b/.github/workflows/nightly-release.yml index ef210dacd..1dc396154 100644 --- a/.github/workflows/nightly-release.yml +++ b/.github/workflows/nightly-release.yml @@ -20,6 +20,7 @@ on: permissions: contents: write # this is the permission that allows creating a new release + packages: write # allows creating a Docker release as a GitHub package on GHCR defaults: run: @@ -39,7 +40,7 @@ jobs: steps: - name: "Checkout ${{ github.repository }} Branch ${{ env.RELEASE_BRANCH }}" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ env.RELEASE_BRANCH }} @@ -57,7 +58,7 @@ jobs: - name: "Audit Version And Parse Into Parts" id: semver - uses: dbt-labs/actions/parse-semver@v1.1.0 + uses: dbt-labs/actions/parse-semver@v1.1.1 with: version: ${{ steps.version-number-sources.outputs.current_version }} @@ -79,7 +80,7 @@ jobs: echo "number=$number" >> $GITHUB_OUTPUT - name: "Audit Nightly Release Version And Parse Into Parts" - uses: dbt-labs/actions/parse-semver@v1.1.0 + uses: dbt-labs/actions/parse-semver@v1.1.1 with: version: ${{ steps.nightly-release-version.outputs.number }} diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml new file mode 100644 index 000000000..f29de2fb2 --- /dev/null +++ b/.github/workflows/release-internal.yml @@ -0,0 +1,49 @@ +# What? +# +# Tag and release an arbitrary ref. Uploads to an internal archive for further processing. +# +# How? +# +# After checking out and testing the provided ref, the image is built and uploaded. +# +# When? +# +# Manual trigger. + +name: "Release internal patch" + +on: + workflow_dispatch: + inputs: + version_number: + description: "The release version number (i.e. 1.0.0b1)" + type: string + required: true + ref: + description: "The ref (sha or branch name) to use" + type: string + default: "main" + required: true + package_test_command: + description: "Package test command" + type: string + default: "python -c \"import dbt.adapters.bigquery\"" + required: true + +defaults: + run: + shell: bash + +jobs: + invoke-reusable-workflow: + name: "Build and Release Internally" + + uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main" + + with: + version_number: "${{ inputs.version_number }}" + package_test_command: "${{ inputs.package_test_command }}" + dbms_name: "bigquery" + ref: "${{ inputs.ref }}" + + secrets: "inherit" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 88942e251..ad7cf76b4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,7 +13,8 @@ # # **when?** # This workflow can be run manually on demand or can be called by other workflows -name: Release to GitHub and PyPI +name: "Release to GitHub, PyPI, and Docker" +run-name: "Release ${{ inputs.version_number }} to GitHub, PyPI, and Docker" on: workflow_dispatch: @@ -60,6 +61,11 @@ on: type: boolean default: false required: false + only_docker: + description: "Only release Docker image, skip GitHub & PyPI" + type: boolean + default: false + required: false workflow_call: inputs: sha: @@ -128,12 +134,11 @@ jobs: echo Package test command: ${{ inputs.package_test_command }} echo Test run: ${{ inputs.test_run }} echo Nightly release: ${{ inputs.nightly_release }} + echo Only Docker: ${{ inputs.only_docker }} bump-version-generate-changelog: name: Bump package version, Generate changelog - uses: dbt-labs/dbt-release/.github/workflows/release-prep.yml@main - with: sha: ${{ inputs.sha }} version_number: ${{ inputs.version_number }} @@ -141,17 +146,13 @@ jobs: env_setup_script_path: ${{ inputs.env_setup_script_path }} test_run: ${{ inputs.test_run }} nightly_release: ${{ inputs.nightly_release }} - secrets: inherit log-outputs-bump-version-generate-changelog: name: "[Log output] Bump package version, Generate changelog" - if: ${{ !failure() && !cancelled() }} - + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [bump-version-generate-changelog] - runs-on: ubuntu-latest - steps: - name: Print variables run: | @@ -160,11 +161,9 @@ jobs: build-test-package: name: Build, Test, Package - if: ${{ !failure() && !cancelled() }} + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [bump-version-generate-changelog] - uses: dbt-labs/dbt-release/.github/workflows/build.yml@main - with: sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }} version_number: ${{ inputs.version_number }} @@ -174,19 +173,15 @@ jobs: package_test_command: ${{ inputs.package_test_command }} test_run: ${{ inputs.test_run }} nightly_release: ${{ inputs.nightly_release }} - secrets: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} github-release: name: GitHub Release - if: ${{ !failure() && !cancelled() }} - + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [bump-version-generate-changelog, build-test-package] - uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main - with: sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }} version_number: ${{ inputs.version_number }} @@ -195,34 +190,41 @@ jobs: pypi-release: name: PyPI Release - - needs: [github-release] - + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} + needs: [bump-version-generate-changelog, build-test-package] uses: dbt-labs/dbt-release/.github/workflows/pypi-release.yml@main - with: version_number: ${{ inputs.version_number }} test_run: ${{ inputs.test_run }} - secrets: PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} + docker-release: + name: "Docker Release" + # We cannot release to docker on a test run because it uses the tag in GitHub as + # what we need to release but draft releases don't actually tag the commit so it + # finds nothing to release + if: ${{ !failure() && !cancelled() && (!inputs.test_run || inputs.only_docker) }} + needs: [bump-version-generate-changelog, build-test-package, github-release] + permissions: + packages: write + uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@main + with: + version_number: ${{ inputs.version_number }} + test_run: ${{ inputs.test_run }} + slack-notification: name: Slack Notification if: ${{ failure() && (!inputs.test_run || inputs.nightly_release) }} - needs: [ - bump-version-generate-changelog, - build-test-package, github-release, pypi-release, + docker-release, ] - uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main with: status: "failure" - secrets: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_ADAPTER_ALERTS }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a98ba0417..74dbdf99a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,10 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - id: check-case-conflict +- repo: https://github.com/dbt-labs/pre-commit-hooks + rev: v0.1.0a1 + hooks: + - id: dbt-core-in-adapters-check - repo: https://github.com/psf/black rev: 23.1.0 hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index ade60b8f6..4a408c580 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ ## Previous Releases For information on prior major and minor releases, see their changelogs: +- [1.8](https://github.com/dbt-labs/dbt-bigquery/blob/1.8.latest/CHANGELOG.md) +- [1.7](https://github.com/dbt-labs/dbt-bigquery/blob/1.7.latest/CHANGELOG.md) - [1.6](https://github.com/dbt-labs/dbt-bigquery/blob/1.6.latest/CHANGELOG.md) - [1.5](https://github.com/dbt-labs/dbt-bigquery/blob/1.5.latest/CHANGELOG.md) - [1.4](https://github.com/dbt-labs/dbt-bigquery/blob/1.4.latest/CHANGELOG.md) diff --git a/Makefile b/Makefile index fc6536f98..bdacb538b 100644 --- a/Makefile +++ b/Makefile @@ -11,17 +11,11 @@ dev-uninstall: ## Uninstalls all packages while maintaining the virtual environm pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y pip uninstall -y dbt-bigquery -.PHONY: ubuntu-py311 -ubuntu-py311: ## Builds and runs an Ubuntu Python 3.11 development container - docker build -f docker_dev/ubuntu.Dockerfile -t dbt-bigquery-ubuntu-py311 . - docker run --rm -it --name dbt-bigquery-ubuntu-py311 -v $(shell pwd):/opt/code dbt-bigquery-ubuntu-py311 +.PHONY: docker-dev +docker-dev: + docker build -f docker/dev.Dockerfile -t dbt-bigquery-dev . + docker run --rm -it --name dbt-bigquery-dev -v $(shell pwd):/opt/code dbt-bigquery-dev -.PHONY: ubuntu-py39 -ubuntu-py39: ## Builds and runs an Ubuntu Python 3.9 development container - docker build -f docker_dev/ubuntu.Dockerfile -t dbt-bigquery-ubuntu-py39 . --build-arg version=3.9 - docker run --rm -it --name dbt-bigquery-ubuntu-py39 -v $(shell pwd):/opt/code dbt-bigquery-ubuntu-py39 - -.PHONY: ubuntu-py38 -ubuntu-py38: ## Builds and runs an Ubuntu Python 3.8 development container - docker build -f docker_dev/ubuntu.Dockerfile -t dbt-bigquery-ubuntu-py38 . --build-arg version=3.8 - docker run --rm -it --name dbt-bigquery-ubuntu-py38 -v $(shell pwd):/opt/code dbt-bigquery-ubuntu-py38 +.PHONY: docker-prod +docker-prod: + docker build -f docker/Dockerfile -t dbt-bigquery . diff --git a/dbt/adapters/bigquery/__version__.py b/dbt/adapters/bigquery/__version__.py index f15b401d1..6698ed64c 100644 --- a/dbt/adapters/bigquery/__version__.py +++ b/dbt/adapters/bigquery/__version__.py @@ -1 +1 @@ -version = "1.8.0a1" +version = "1.9.0a1" diff --git a/dbt/adapters/bigquery/connections.py b/dbt/adapters/bigquery/connections.py index c74effcdc..f96bc1381 100644 --- a/dbt/adapters/bigquery/connections.py +++ b/dbt/adapters/bigquery/connections.py @@ -1,5 +1,4 @@ -import asyncio -import functools +from concurrent.futures import TimeoutError import json import re from contextlib import contextmanager @@ -17,7 +16,7 @@ import google.auth import google.auth.exceptions -import google.cloud.bigquery as bigquery +import google.cloud.bigquery import google.cloud.exceptions from google.api_core import retry, client_info from google.auth import impersonated_credentials @@ -33,6 +32,7 @@ DbtRuntimeError, DbtConfigError, ) + from dbt_common.exceptions import DbtDatabaseError from dbt.adapters.exceptions.connection import FailedToConnectError from dbt.adapters.base import BaseConnectionManager @@ -63,16 +63,6 @@ ) -# Override broken json deserializer for dbt show --inline -# can remove once this is fixed: https://github.com/googleapis/python-bigquery/issues/1500 -def _json_from_json(value, _): - """NOOP string -> string coercion""" - return json.loads(value) - - -bigquery._helpers._CELLDATA_FROM_JSON["JSON"] = _json_from_json - - @lru_cache() def get_bigquery_defaults(scopes=None) -> Tuple[Any, Optional[str]]: """ @@ -206,9 +196,7 @@ def _connection_keys(self): "job_retries", "job_creation_timeout_seconds", "job_execution_timeout_seconds", - "keyfile", "timeout_seconds", - "refresh_token", "client_id", "token_uri", "dataproc_region", @@ -749,27 +737,12 @@ def _query_and_results( logger.debug( self._bq_job_link(query_job.location, query_job.project, query_job.job_id) ) - - # only use async logic if user specifies a timeout - if job_execution_timeout: - loop = asyncio.new_event_loop() - future_iterator = asyncio.wait_for( - loop.run_in_executor(None, functools.partial(query_job.result, max_results=limit)), - timeout=job_execution_timeout, - ) - - try: - iterator = loop.run_until_complete(future_iterator) - except asyncio.TimeoutError: - query_job.cancel() - raise DbtRuntimeError( - f"Query exceeded configured timeout of {job_execution_timeout}s" - ) - finally: - loop.close() - else: - iterator = query_job.result(max_results=limit) - return query_job, iterator + try: + iterator = query_job.result(max_results=limit, timeout=job_execution_timeout) + return query_job, iterator + except TimeoutError: + exc = f"Operation did not complete within the designated timeout of {job_execution_timeout} seconds." + raise TimeoutError(exc) def _retry_and_handle(self, msg, conn, fn): """retry a function call within the context of exception_handler.""" diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 747c667a2..ad8a6a13e 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from datetime import datetime import json import threading from multiprocessing.context import SpawnContext @@ -20,9 +21,12 @@ SchemaSearchMap, available, ) +from dbt.adapters.base.impl import FreshnessResponse from dbt.adapters.cache import _make_ref_key_dict # type: ignore +from dbt.adapters.capability import Capability, CapabilityDict, CapabilitySupport, Support import dbt_common.clients.agate_helper from dbt.adapters.contracts.connection import AdapterResponse +from dbt.adapters.contracts.macros import MacroResolverProtocol from dbt_common.contracts.constraints import ColumnLevelConstraint, ConstraintType, ModelLevelConstraint # type: ignore from dbt_common.dataclass_schema import dbtClassMixin from dbt.adapters.events.logging import AdapterLogger @@ -36,6 +40,7 @@ import google.cloud.bigquery from google.cloud.bigquery import AccessEntry, SchemaField, Table as BigQueryTable import google.cloud.exceptions +import pytz from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager from dbt.adapters.bigquery.column import get_nested_column_data_types @@ -94,6 +99,8 @@ class BigqueryConfig(AdapterConfig): enable_refresh: Optional[bool] = None refresh_interval_minutes: Optional[int] = None max_staleness: Optional[str] = None + enable_list_inference: Optional[bool] = None + intermediate_format: Optional[str] = None allow_non_incremental_definition: Optional[bool] = None @@ -119,6 +126,13 @@ class BigQueryAdapter(BaseAdapter): ConstraintType.foreign_key: ConstraintSupport.NOT_ENFORCED, } + _capabilities: CapabilityDict = CapabilityDict( + { + Capability.TableLastModifiedMetadata: CapabilitySupport(support=Support.Full), + Capability.SchemaMetadataByRelations: CapabilitySupport(support=Support.Full), + } + ) + def __init__(self, config, mp_context: SpawnContext) -> None: super().__init__(config, mp_context) self.connections: BigQueryConnectionManager = self.connections @@ -641,7 +655,9 @@ def alter_table_add_columns(self, relation, columns): client.update_table(new_table, ["schema"]) @available.parse_none - def load_dataframe(self, database, schema, table_name, agate_table, column_override): + def load_dataframe( + self, database, schema, table_name, agate_table, column_override, field_delimiter + ): bq_schema = self._agate_to_schema(agate_table, column_override) conn = self.connections.get_thread_connection() client = conn.handle @@ -651,7 +667,7 @@ def load_dataframe(self, database, schema, table_name, agate_table, column_overr load_config = google.cloud.bigquery.LoadJobConfig() load_config.skip_leading_rows = 1 load_config.schema = bq_schema - + load_config.field_delimiter = field_delimiter with open(agate_table.original_abspath, "rb") as f: job = client.load_table_from_file(f, table_ref, rewind=True, job_config=load_config) @@ -710,6 +726,26 @@ def _get_catalog_schemas(self, relation_config: Iterable[RelationConfig]) -> Sch ) return result + def calculate_freshness_from_metadata( + self, + source: BaseRelation, + macro_resolver: Optional[MacroResolverProtocol] = None, + ) -> Tuple[Optional[AdapterResponse], FreshnessResponse]: + conn = self.connections.get_thread_connection() + client: google.cloud.bigquery.Client = conn.handle + + table_ref = self.get_table_ref_from_relation(source) + table = client.get_table(table_ref) + snapshot = datetime.now(tz=pytz.UTC) + + freshness = FreshnessResponse( + max_loaded_at=table.modified, + snapshotted_at=snapshot, + age=(snapshot - table.modified).total_seconds(), + ) + + return None, freshness + @available.parse(lambda *a, **k: {}) def get_common_options( self, config: Dict[str, Any], node: Dict[str, Any], temporary: bool = False diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 0449bd9d5..99aa2036c 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import FrozenSet, Optional, TypeVar from itertools import chain, islice @@ -23,9 +23,23 @@ class BigQueryRelation(BaseRelation): quote_character: str = "`" location: Optional[str] = None - renameable_relations: FrozenSet[RelationType] = frozenset({RelationType.Table}) - replaceable_relations: FrozenSet[RelationType] = frozenset( - {RelationType.Table, RelationType.View} + require_alias: bool = False + + renameable_relations: FrozenSet[RelationType] = field( + default_factory=lambda: frozenset( + { + RelationType.Table, + } + ) + ) + + replaceable_relations: FrozenSet[RelationType] = field( + default_factory=lambda: frozenset( + { + RelationType.View, + RelationType.Table, + } + ) ) def matches( diff --git a/dbt/adapters/bigquery/relation_configs/_materialized_view.py b/dbt/adapters/bigquery/relation_configs/_materialized_view.py index fd0c191c3..81ca6b3de 100644 --- a/dbt/adapters/bigquery/relation_configs/_materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/_materialized_view.py @@ -75,10 +75,10 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any } # optional - if "partition_by" in relation_config.config: + if relation_config.config and "partition_by" in relation_config.config: config_dict.update({"partition": PartitionConfig.parse_model_node(relation_config)}) - if "cluster_by" in relation_config.config: + if relation_config.config and "cluster_by" in relation_config.config: config_dict.update( {"cluster": BigQueryClusterConfig.parse_relation_config(relation_config)} ) diff --git a/dbt/adapters/bigquery/relation_configs/_partition.py b/dbt/adapters/bigquery/relation_configs/_partition.py index 8fe8bf5d6..555aa3664 100644 --- a/dbt/adapters/bigquery/relation_configs/_partition.py +++ b/dbt/adapters/bigquery/relation_configs/_partition.py @@ -111,7 +111,7 @@ def parse_model_node(cls, relation_config: RelationConfig) -> Dict[str, Any]: This doesn't currently collect `time_ingestion_partitioning` and `copy_partitions` because this was built for materialized views, which do not support those settings. """ - config_dict = relation_config.config.extra.get("partition_by") # type: ignore + config_dict: Dict[str, Any] = relation_config.config.extra.get("partition_by") # type: ignore if "time_ingestion_partitioning" in config_dict: del config_dict["time_ingestion_partitioning"] if "copy_partitions" in config_dict: diff --git a/dbt/include/bigquery/macros/catalog.sql b/dbt/include/bigquery/macros/catalog.sql deleted file mode 100644 index 25166c7b4..000000000 --- a/dbt/include/bigquery/macros/catalog.sql +++ /dev/null @@ -1,231 +0,0 @@ - -{% macro bigquery__get_catalog(information_schema, schemas) -%} - - {%- if (schemas | length) == 0 -%} - {# Hopefully nothing cares about the columns we return when there are no rows #} - {%- set query = "select 1 as id limit 0" -%} - {%- else -%} - - {%- set query -%} - with materialized_views as ( - select - table_catalog as project_id, - table_schema as dataset_id, - table_name as table_id - from {{ information_schema.replace(information_schema_view='MATERIALIZED_VIEWS') }} - ), - tables as ( - select - tables.project_id as table_database, - tables.dataset_id as table_schema, - tables.table_id as original_table_name, - - concat(tables.project_id, '.', tables.dataset_id, '.', tables.table_id) as relation_id, - - tables.row_count, - tables.size_bytes as size_bytes, - case - when materialized_views.table_id is not null then 'materialized view' - when tables.type = 1 then 'table' - when tables.type = 2 then 'view' - else 'external' - end as table_type, - - REGEXP_CONTAINS(tables.table_id, '^.+[0-9]{8}$') and coalesce(type, 0) = 1 as is_date_shard, - REGEXP_EXTRACT(tables.table_id, '^(.+)[0-9]{8}$') as shard_base_name, - REGEXP_EXTRACT(tables.table_id, '^.+([0-9]{8})$') as shard_name - - from {{ information_schema.replace(information_schema_view='__TABLES__') }} tables - left join materialized_views - on materialized_views.project_id = tables.project_id - and materialized_views.dataset_id = tables.dataset_id - and materialized_views.table_id = tables.table_id - where ( - {%- for schema in schemas -%} - upper(tables.dataset_id) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) - ), - - table_options as ( - select - concat(table_catalog, '.', table_schema, '.', table_name) as relation_id, - JSON_VALUE(option_value) as table_comment - - from {{ information_schema.replace(information_schema_view='TABLE_OPTIONS') }} - where option_name = 'description' - ), - extracted as ( - - select *, - case - when is_date_shard then shard_base_name - else original_table_name - end as table_name - - from tables - - ), - - unsharded_tables as ( - - select - table_database, - table_schema, - table_name, - coalesce(table_type, 'external') as table_type, - is_date_shard, - - struct( - min(shard_name) as shard_min, - max(shard_name) as shard_max, - count(*) as shard_count - ) as table_shards, - - sum(size_bytes) as size_bytes, - sum(row_count) as row_count, - - max(relation_id) as relation_id - - from extracted - group by 1,2,3,4,5 - - ), - - info_schema_columns as ( - - select - concat(table_catalog, '.', table_schema, '.', table_name) as relation_id, - table_catalog as table_database, - table_schema, - table_name, - - -- use the "real" column name from the paths query below - column_name as base_column_name, - ordinal_position as column_index, - - is_partitioning_column, - clustering_ordinal_position - - from {{ information_schema.replace(information_schema_view='COLUMNS') }} - where ordinal_position is not null - - ), - - info_schema_column_paths as ( - - select - concat(table_catalog, '.', table_schema, '.', table_name) as relation_id, - field_path as column_name, - data_type as column_type, - column_name as base_column_name, - description as column_comment - - from {{ information_schema.replace(information_schema_view='COLUMN_FIELD_PATHS') }} - - ), - - columns as ( - - select * except (base_column_name) - from info_schema_columns - join info_schema_column_paths using (relation_id, base_column_name) - - ), - - column_stats as ( - - select - table_database, - table_schema, - table_name, - max(relation_id) as relation_id, - max(case when is_partitioning_column = 'YES' then 1 else 0 end) = 1 as is_partitioned, - max(case when is_partitioning_column = 'YES' then column_name else null end) as partition_column, - max(case when clustering_ordinal_position is not null then 1 else 0 end) = 1 as is_clustered, - array_to_string( - array_agg( - case - when clustering_ordinal_position is not null then column_name - else null - end ignore nulls - order by clustering_ordinal_position - ), ', ' - ) as clustering_columns - - from columns - group by 1,2,3 - - ) - - select - unsharded_tables.table_database, - unsharded_tables.table_schema, - case - when is_date_shard then concat(unsharded_tables.table_name, '*') - else unsharded_tables.table_name - end as table_name, - unsharded_tables.table_type, - table_options.table_comment, - - -- coalesce name and type for External tables - these columns are not - -- present in the COLUMN_FIELD_PATHS resultset - coalesce(columns.column_name, '') as column_name, - -- invent a row number to account for nested fields -- BQ does - -- not treat these nested properties as independent fields - row_number() over ( - partition by relation_id - order by columns.column_index, columns.column_name - ) as column_index, - coalesce(columns.column_type, '') as column_type, - columns.column_comment, - - 'Shard count' as `stats__date_shards__label`, - table_shards.shard_count as `stats__date_shards__value`, - 'The number of date shards in this table' as `stats__date_shards__description`, - is_date_shard as `stats__date_shards__include`, - - 'Shard (min)' as `stats__date_shard_min__label`, - table_shards.shard_min as `stats__date_shard_min__value`, - 'The first date shard in this table' as `stats__date_shard_min__description`, - is_date_shard as `stats__date_shard_min__include`, - - 'Shard (max)' as `stats__date_shard_max__label`, - table_shards.shard_max as `stats__date_shard_max__value`, - 'The last date shard in this table' as `stats__date_shard_max__description`, - is_date_shard as `stats__date_shard_max__include`, - - '# Rows' as `stats__num_rows__label`, - row_count as `stats__num_rows__value`, - 'Approximate count of rows in this table' as `stats__num_rows__description`, - (unsharded_tables.table_type = 'table') as `stats__num_rows__include`, - - 'Approximate Size' as `stats__num_bytes__label`, - size_bytes as `stats__num_bytes__value`, - 'Approximate size of table as reported by BigQuery' as `stats__num_bytes__description`, - (unsharded_tables.table_type = 'table') as `stats__num_bytes__include`, - - 'Partitioned By' as `stats__partitioning_type__label`, - partition_column as `stats__partitioning_type__value`, - 'The partitioning column for this table' as `stats__partitioning_type__description`, - is_partitioned as `stats__partitioning_type__include`, - - 'Clustered By' as `stats__clustering_fields__label`, - clustering_columns as `stats__clustering_fields__value`, - 'The clustering columns for this table' as `stats__clustering_fields__description`, - is_clustered as `stats__clustering_fields__include` - - -- join using relation_id (an actual relation, not a shard prefix) to make - -- sure that column metadata is picked up through the join. This will only - -- return the column information for the "max" table in a date-sharded table set - from unsharded_tables - left join table_options using (relation_id) - left join columns using (relation_id) - left join column_stats using (relation_id) - {%- endset -%} - - {%- endif -%} - - {{ return(run_query(query)) }} - -{%- endmacro %} diff --git a/dbt/include/bigquery/macros/catalog/by_relation.sql b/dbt/include/bigquery/macros/catalog/by_relation.sql new file mode 100644 index 000000000..adaa740f6 --- /dev/null +++ b/dbt/include/bigquery/macros/catalog/by_relation.sql @@ -0,0 +1,36 @@ +{% macro bigquery__get_catalog_relations(information_schema, relations) -%} + + {%- if (relations | length) == 0 -%} + {# Hopefully nothing cares about the columns we return when there are no rows #} + {%- set query = "select 1 as id limit 0" -%} + + {%- else -%} + {%- set query -%} + with + table_shards_stage as ({{ _bigquery__get_table_shards_sql(information_schema) }}), + table_shards as ( + select * from table_shards_stage + where ( + {%- for relation in relations -%} + ( + upper(table_schema) = upper('{{ relation.schema }}') + and upper(table_name) = upper('{{ relation.identifier }}') + ) + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + ), + tables as ({{ _bigquery__get_tables_sql() }}), + table_stats as ({{ _bigquery__get_table_stats_sql() }}), + + columns as ({{ _bigquery__get_columns_sql(information_schema) }}), + column_stats as ({{ _bigquery__get_column_stats_sql() }}) + + {{ _bigquery__get_extended_catalog_sql() }} + {%- endset -%} + + {%- endif -%} + + {{ return(run_query(query)) }} + +{%- endmacro %} diff --git a/dbt/include/bigquery/macros/catalog/by_schema.sql b/dbt/include/bigquery/macros/catalog/by_schema.sql new file mode 100644 index 000000000..0d36f2b84 --- /dev/null +++ b/dbt/include/bigquery/macros/catalog/by_schema.sql @@ -0,0 +1,32 @@ +{% macro bigquery__get_catalog(information_schema, schemas) -%} + + {%- if (schemas | length) == 0 -%} + {# Hopefully nothing cares about the columns we return when there are no rows #} + {%- set query = "select 1 as id limit 0" -%} + + {%- else -%} + {%- set query -%} + with + table_shards as ( + {{ _bigquery__get_table_shards_sql(information_schema) }} + where ( + {%- for schema in schemas -%} + upper(tables.dataset_id) = upper('{{ schema }}') + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + ), + tables as ({{ _bigquery__get_tables_sql() }}), + table_stats as ({{ _bigquery__get_table_stats_sql() }}), + + columns as ({{ _bigquery__get_columns_sql(information_schema) }}), + column_stats as ({{ _bigquery__get_column_stats_sql() }}) + + {{ _bigquery__get_extended_catalog_sql() }} + {%- endset -%} + + {%- endif -%} + + {{ return(run_query(query)) }} + +{%- endmacro %} diff --git a/dbt/include/bigquery/macros/catalog/catalog.sql b/dbt/include/bigquery/macros/catalog/catalog.sql new file mode 100644 index 000000000..de16f82bf --- /dev/null +++ b/dbt/include/bigquery/macros/catalog/catalog.sql @@ -0,0 +1,177 @@ +{% macro _bigquery__get_table_shards_sql(information_schema) %} + select + tables.project_id as table_catalog, + tables.dataset_id as table_schema, + coalesce(REGEXP_EXTRACT(tables.table_id, '^(.+)[0-9]{8}$'), tables.table_id) as table_name, + tables.table_id as shard_name, + REGEXP_EXTRACT(tables.table_id, '^.+([0-9]{8})$') as shard_index, + REGEXP_CONTAINS(tables.table_id, '^.+[0-9]{8}$') and tables.type = 1 as is_date_shard, + case + when materialized_views.table_name is not null then 'materialized view' + when tables.type = 1 then 'table' + when tables.type = 2 then 'view' + else 'external' + end as table_type, + tables.type = 1 as is_table, + JSON_VALUE(table_description.option_value) as table_comment, + tables.size_bytes, + tables.row_count + from {{ information_schema.replace(information_schema_view='__TABLES__') }} tables + left join {{ information_schema.replace(information_schema_view='MATERIALIZED_VIEWS') }} materialized_views + on materialized_views.table_catalog = tables.project_id + and materialized_views.table_schema = tables.dataset_id + and materialized_views.table_name = tables.table_id + left join {{ information_schema.replace(information_schema_view='TABLE_OPTIONS') }} table_description + on table_description.table_catalog = tables.project_id + and table_description.table_schema = tables.dataset_id + and table_description.table_name = tables.table_id + and table_description.option_name = 'description' +{% endmacro %} + + +{% macro _bigquery__get_tables_sql() %} + select distinct + table_catalog, + table_schema, + table_name, + is_date_shard, + table_type, + is_table, + table_comment + from table_shards +{% endmacro %} + + +{% macro _bigquery__get_table_stats_sql() %} + select + table_catalog, + table_schema, + table_name, + max(shard_name) as latest_shard_name, + min(shard_index) as shard_min, + max(shard_index) as shard_max, + count(shard_index) as shard_count, + sum(size_bytes) as size_bytes, + sum(row_count) as row_count + from table_shards + group by 1, 2, 3 +{% endmacro %} + + +{% macro _bigquery__get_columns_sql(information_schema) %} + select + columns.table_catalog, + columns.table_schema, + columns.table_name as shard_name, + coalesce(paths.field_path, '') as column_name, + -- invent a row number to account for nested fields + -- BQ does not treat these nested properties as independent fields + row_number() over ( + partition by + columns.table_catalog, + columns.table_schema, + columns.table_name + order by + columns.ordinal_position, + paths.field_path + ) as column_index, + coalesce(paths.data_type, '') as column_type, + paths.description as column_comment, + case when columns.is_partitioning_column = 'YES' then 1 else 0 end as is_partitioning_column, + case when columns.is_partitioning_column = 'YES' then paths.field_path end as partition_column, + case when columns.clustering_ordinal_position is not null then 1 else 0 end as is_clustering_column, + case when columns.clustering_ordinal_position is not null then paths.field_path end as cluster_column, + columns.clustering_ordinal_position + from {{ information_schema.replace(information_schema_view='COLUMNS') }} columns + join {{ information_schema.replace(information_schema_view='COLUMN_FIELD_PATHS') }} paths + on paths.table_catalog = columns.table_catalog + and paths.table_schema = columns.table_schema + and paths.table_name = columns.table_name + and paths.column_name = columns.column_name + where columns.ordinal_position is not null +{% endmacro %} + + +{% macro _bigquery__get_column_stats_sql() %} + select + table_catalog, + table_schema, + shard_name, + max(is_partitioning_column) = 1 as is_partitioned, + max(partition_column) as partition_column, + max(is_clustering_column) = 1 as is_clustered, + array_to_string( + array_agg( + cluster_column ignore nulls + order by clustering_ordinal_position + ), ', ' + ) as clustering_columns + from columns + group by 1, 2, 3 +{% endmacro %} + + +{% macro _bigquery__get_extended_catalog_sql() %} + select + tables.table_catalog as table_database, + tables.table_schema, + case + when tables.is_date_shard then concat(tables.table_name, '*') + else tables.table_name + end as table_name, + tables.table_type, + tables.table_comment, + columns.column_name, + columns.column_index, + columns.column_type, + columns.column_comment, + + 'Shard count' as `stats__date_shards__label`, + table_stats.shard_count as `stats__date_shards__value`, + 'The number of date shards in this table' as `stats__date_shards__description`, + tables.is_date_shard as `stats__date_shards__include`, + + 'Shard (min)' as `stats__date_shard_min__label`, + table_stats.shard_min as `stats__date_shard_min__value`, + 'The first date shard in this table' as `stats__date_shard_min__description`, + tables.is_date_shard as `stats__date_shard_min__include`, + + 'Shard (max)' as `stats__date_shard_max__label`, + table_stats.shard_max as `stats__date_shard_max__value`, + 'The last date shard in this table' as `stats__date_shard_max__description`, + tables.is_date_shard as `stats__date_shard_max__include`, + + '# Rows' as `stats__num_rows__label`, + table_stats.row_count as `stats__num_rows__value`, + 'Approximate count of rows in this table' as `stats__num_rows__description`, + tables.is_table as `stats__num_rows__include`, + + 'Approximate Size' as `stats__num_bytes__label`, + table_stats.size_bytes as `stats__num_bytes__value`, + 'Approximate size of table as reported by BigQuery' as `stats__num_bytes__description`, + tables.is_table as `stats__num_bytes__include`, + + 'Partitioned By' as `stats__partitioning_type__label`, + column_stats.partition_column as `stats__partitioning_type__value`, + 'The partitioning column for this table' as `stats__partitioning_type__description`, + column_stats.is_partitioned as `stats__partitioning_type__include`, + + 'Clustered By' as `stats__clustering_fields__label`, + column_stats.clustering_columns as `stats__clustering_fields__value`, + 'The clustering columns for this table' as `stats__clustering_fields__description`, + column_stats.is_clustered as `stats__clustering_fields__include` + + from tables + join table_stats + on table_stats.table_catalog = tables.table_catalog + and table_stats.table_schema = tables.table_schema + and table_stats.table_name = tables.table_name + left join column_stats + on column_stats.table_catalog = tables.table_catalog + and column_stats.table_schema = tables.table_schema + and column_stats.shard_name = table_stats.latest_shard_name + left join columns + on columns.table_catalog = tables.table_catalog + and columns.table_schema = tables.table_schema + and columns.shard_name = table_stats.latest_shard_name +{% endmacro %} diff --git a/dbt/include/bigquery/macros/materializations/incremental_strategy/common.sql b/dbt/include/bigquery/macros/materializations/incremental_strategy/common.sql index 9d71ba7c0..1c02f4912 100644 --- a/dbt/include/bigquery/macros/materializations/incremental_strategy/common.sql +++ b/dbt/include/bigquery/macros/materializations/incremental_strategy/common.sql @@ -11,3 +11,23 @@ {%- endif -%} {% endmacro %} + +{% macro predicate_for_avoid_require_partition_filter(target='DBT_INTERNAL_DEST') %} + + {%- set raw_partition_by = config.get('partition_by', none) -%} + {%- set partition_config = adapter.parse_partition_by(raw_partition_by) -%} + {%- set predicate = none -%} + + {% if partition_config and config.get('require_partition_filter') -%} + {%- set partition_field = partition_config.time_partitioning_field() if partition_config.time_ingestion_partitioning else partition_config.field -%} + {% set predicate %} + ( + `{{ target }}`.`{{ partition_field }}` is null + or `{{ target }}`.`{{ partition_field }}` is not null + ) + {% endset %} + {%- endif -%} + + {{ return(predicate) }} + +{% endmacro %} diff --git a/dbt/include/bigquery/macros/materializations/incremental_strategy/merge.sql b/dbt/include/bigquery/macros/materializations/incremental_strategy/merge.sql index 90af66f52..a204caed9 100644 --- a/dbt/include/bigquery/macros/materializations/incremental_strategy/merge.sql +++ b/dbt/include/bigquery/macros/materializations/incremental_strategy/merge.sql @@ -21,7 +21,13 @@ {%- endif -%} {%- endset -%} - {% set build_sql = get_merge_sql(target_relation, source_sql, unique_key, dest_columns, incremental_predicates) %} + {%- set predicates = [] if incremental_predicates is none else [] + incremental_predicates -%} + {%- set avoid_require_partition_filter = predicate_for_avoid_require_partition_filter() -%} + {%- if avoid_require_partition_filter is not none -%} + {% do predicates.append(avoid_require_partition_filter) %} + {%- endif -%} + + {% set build_sql = get_merge_sql(target_relation, source_sql, unique_key, dest_columns, predicates) %} {{ return(build_sql) }} diff --git a/dbt/include/bigquery/macros/materializations/seed.sql b/dbt/include/bigquery/macros/materializations/seed.sql index 6ac7337f3..c89d00598 100644 --- a/dbt/include/bigquery/macros/materializations/seed.sql +++ b/dbt/include/bigquery/macros/materializations/seed.sql @@ -11,7 +11,7 @@ {%- set column_override = model['config'].get('column_types', {}) -%} {{ adapter.load_dataframe(model['database'], model['schema'], model['alias'], - agate_table, column_override) }} + agate_table, column_override, model['config']['delimiter']) }} {% call statement() %} alter table {{ this.render() }} set {{ bigquery_table_options(config, model) }} diff --git a/dbt/include/bigquery/macros/materializations/table.sql b/dbt/include/bigquery/macros/materializations/table.sql index 68117b06a..e3c5b3598 100644 --- a/dbt/include/bigquery/macros/materializations/table.sql +++ b/dbt/include/bigquery/macros/materializations/table.sql @@ -49,12 +49,19 @@ from pyspark.sql import SparkSession {%- set raw_partition_by = config.get('partition_by', none) -%} {%- set raw_cluster_by = config.get('cluster_by', none) -%} +{%- set enable_list_inference = config.get('enable_list_inference', true) -%} +{%- set intermediate_format = config.get('intermediate_format', none) -%} + {%- set partition_config = adapter.parse_partition_by(raw_partition_by) %} spark = SparkSession.builder.appName('smallTest').getOrCreate() spark.conf.set("viewsEnabled","true") spark.conf.set("temporaryGcsBucket","{{target.gcs_bucket}}") +spark.conf.set("enableListInference", "{{ enable_list_inference }}") +{% if intermediate_format %} +spark.conf.set("intermediateFormat", "{{ intermediate_format }}") +{% endif %} {{ compiled_code }} dbt = dbtObj(spark.read.format("bigquery").load) diff --git a/dbt/include/bigquery/macros/utils/date.sql b/dbt/include/bigquery/macros/utils/date.sql new file mode 100644 index 000000000..0f3b85aca --- /dev/null +++ b/dbt/include/bigquery/macros/utils/date.sql @@ -0,0 +1,3 @@ +{% macro bigquery__date(year, month, day) -%} + date({{ year }}, {{ month }}, {{ day }}) +{%- endmacro %} diff --git a/dbt/include/bigquery/macros/utils/string_literal.sql b/dbt/include/bigquery/macros/utils/string_literal.sql new file mode 100644 index 000000000..07e67319a --- /dev/null +++ b/dbt/include/bigquery/macros/utils/string_literal.sql @@ -0,0 +1,3 @@ +{%- macro bigquery__string_literal(value) -%} + '''{{ value }}''' +{%- endmacro -%} diff --git a/dev-requirements.txt b/dev-requirements.txt index 0af563a7d..cf7b1b87c 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -6,10 +6,10 @@ git+https://github.com/dbt-labs/dbt-adapters.git git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor -black~=23.12 +black>=24.3 bumpversion~=0.6.0 click~=8.1 -ddtrace~=2.3 +ddtrace==2.3.0 flake8~=6.1 flaky~=3.7 freezegun~=1.3 diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..3b9431fd1 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,37 @@ +# this image gets published to GHCR for production use +ARG py_version=3.11.2 + +FROM python:$py_version-slim-bullseye as base + +RUN apt-get update \ + && apt-get dist-upgrade -y \ + && apt-get install -y --no-install-recommends \ + build-essential=12.9 \ + ca-certificates=20210119 \ + git=1:2.30.2-1+deb11u2 \ + libpq-dev=13.14-0+deb11u1 \ + make=4.3-4.1 \ + openssh-client=1:8.4p1-5+deb11u3 \ + software-properties-common=0.96.20.2-2.1 \ + && apt-get clean \ + && rm -rf \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + +ENV PYTHONIOENCODING=utf-8 +ENV LANG=C.UTF-8 + +RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir + + +FROM base as dbt-bigquery + +ARG commit_ref=main + +HEALTHCHECK CMD dbt --version || exit 1 + +WORKDIR /usr/app/dbt/ +ENTRYPOINT ["dbt"] + +RUN python -m pip install --no-cache-dir "dbt-bigquery @ git+https://github.com/dbt-labs/dbt-bigquery@${commit_ref}" diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..8c60deaa3 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,58 @@ +# Docker for dbt +This docker file is suitable for building dbt Docker images locally or using with CI/CD to automate populating a container registry. + + +## Building an image: +This Dockerfile can create images for the following target: `dbt-bigquery` + +In order to build a new image, run the following docker command. +```shell +docker build --tag --target dbt-bigquery +``` +--- +> **Note:** Docker must be configured to use [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) in order for images to build properly! + +--- + +By default the image will be populated with the latest version of `dbt-bigquery` on `main`. +If you need to use a different version you can specify it by git ref using the `--build-arg` flag: +```shell +docker build --tag \ + --target dbt-bigquery \ + --build-arg commit_ref= \ + +``` + +### Examples: +To build an image named "my-dbt" that supports Snowflake using the latest releases: +```shell +cd dbt-core/docker +docker build --tag my-dbt --target dbt-bigquery . +``` + +To build an image named "my-other-dbt" that supports Snowflake using the adapter version 1.0.0b1: +```shell +cd dbt-core/docker +docker build \ + --tag my-other-dbt \ + --target dbt-bigquery \ + --build-arg commit_ref=v1.0.0b1 \ + . +``` + +## Running an image in a container: +The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal: +```shell +docker run \ + --network=host \ + --mount type=bind,source=path/to/project,target=/usr/app \ + --mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/profiles.yml \ + my-dbt \ + ls +``` +--- +**Notes:** +* Bind-mount sources _must_ be an absolute path +* You may need to make adjustments to the docker networking setting depending on the specifics of your data warehouse/database host. + +--- diff --git a/docker/dev.Dockerfile b/docker/dev.Dockerfile new file mode 100644 index 000000000..2afad0a95 --- /dev/null +++ b/docker/dev.Dockerfile @@ -0,0 +1,50 @@ +# this image does not get published, it is intended for local development only, see `Makefile` for usage +FROM ubuntu:24.04 as base + +# prevent python installation from asking for time zone region +ARG DEBIAN_FRONTEND=noninteractive + +# add python repository +RUN apt-get update \ + && apt-get install -y software-properties-common=0.99.22.9 \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get clean \ + && rm -rf \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + +# install python +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential=12.9ubuntu3 \ + git-all=1:2.34.1-1ubuntu1.10 \ + python3.8=3.8.19-1+jammy1 \ + python3.8-dev=3.8.19-1+jammy1 \ + python3.8-distutils=3.8.19-1+jammy1 \ + python3.8-venv=3.8.19-1+jammy1 \ + python3-pip=22.0.2+dfsg-1ubuntu0.4 \ + python3-wheel=0.37.1-2ubuntu0.22.04.1 \ + && apt-get clean \ + && rm -rf \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + +# update the default system interpreter to the newly installed version +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 + + +FROM base as dbt-bigquery-dev + +HEALTHCHECK CMD python3 --version || exit 1 + +# send stdout/stderr to terminal +ENV PYTHONUNBUFFERED=1 + +# setup mount for local code +WORKDIR /opt/code +VOLUME /opt/code + +# create a virtual environment +RUN python3 -m venv /opt/venv diff --git a/docker_dev/README.md b/docker_dev/README.md deleted file mode 100644 index dd487fea7..000000000 --- a/docker_dev/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Docker Dev Images - -These images are solely for development purposes. They are -saved here for convenience. There should be no expectation -of stability or maintenance. diff --git a/docker_dev/ubuntu.Dockerfile b/docker_dev/ubuntu.Dockerfile deleted file mode 100644 index bac3f5993..000000000 --- a/docker_dev/ubuntu.Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -FROM ubuntu:latest - -# default to py3.11, this can be overridden at build, e.g. `docker build ... --build-arg version=3.10` -ARG version=3.11 - -# prevent python installation from asking for time zone region -ARG DEBIAN_FRONTEND=noninteractive - -# get add-apt-repository -RUN apt-get update && \ - apt-get install -y software-properties-common - -# add the python repository -RUN apt-get update && \ - add-apt-repository -y ppa:deadsnakes/ppa - -# install python and git (for installing dbt-core) -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - python$version \ - python$version-dev \ - python$version-distutils \ - python$version-venv \ - python3-pip \ - python3-wheel \ - build-essential \ - git-all - -# clean up -RUN apt-get clean && \ - rm -rf \ - /var/lib/apt/lists/* \ - /tmp/* \ - /var/tmp/* - -# update the default system interpreter to the newly installed version -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python$version 1 - -# setup mount for our code -WORKDIR /opt/code -VOLUME /opt/code - -# install tox in the system interpreter (it creates it's own virtual environments) -RUN pip install tox - -# explicitly create a virtual environment as well for interactive testing -RUN python3 -m venv /opt/venv - -# send stdout/stderr to terminal -ENV PYTHONUNBUFFERED=1 diff --git a/setup.py b/setup.py index 2e969e246..3f4bad228 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,6 @@ def _dbt_bigquery_version() -> str: package_name = "dbt-bigquery" -package_version = "1.8.0a1" description = """The BigQuery adapter plugin for dbt""" setup( @@ -51,14 +50,17 @@ def _dbt_bigquery_version() -> str: packages=find_namespace_packages(include=["dbt", "dbt.*"]), include_package_data=True, install_requires=[ - "dbt-common<1.0", - "dbt-adapters~=0.1.0a1", - "google-cloud-bigquery~=3.0", + "dbt-common>=1.0.4,<2.0", + "dbt-adapters>=1.1.1,<2.0", + # 3.20 introduced pyarrow>=3.0 under the `pandas` extra + "google-cloud-bigquery[pandas]>=3.0,<4.0", "google-cloud-storage~=2.4", "google-cloud-dataproc~=5.0", # ---- # Expect compatibility with all new versions of these packages, so lower bounds only. "google-api-core>=2.11.0", + # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency + "dbt-core>=1.8.0", ], zip_safe=False, classifiers=[ diff --git a/tests/boundary/test_bigquery_sdk.py b/tests/boundary/test_bigquery_sdk.py new file mode 100644 index 000000000..b8e6c9995 --- /dev/null +++ b/tests/boundary/test_bigquery_sdk.py @@ -0,0 +1,18 @@ +import pytest + +from dbt.tests.util import get_connection +from google.cloud.bigquery import Client, DatasetReference, TableReference +from google.api_core.exceptions import NotFound + + +@pytest.mark.parametrize("table_name", ["this_table_does_not_exist"]) +def test_get_table_does_not_exist(project, table_name): + """ + TODO: replace dbt project methods with direct connection instantiation + """ + with get_connection(project.adapter) as conn: + client: Client = conn.handle + dataset_ref = DatasetReference(project.database, project.test_schema) + table_ref = TableReference(dataset_ref, table_name) + with pytest.raises(NotFound): + client.get_table(table_ref) diff --git a/tests/functional/adapter/dbt_show/test_dbt_show.py b/tests/functional/adapter/dbt_show/test_dbt_show.py index 203d7031b..acb54cc47 100644 --- a/tests/functional/adapter/dbt_show/test_dbt_show.py +++ b/tests/functional/adapter/dbt_show/test_dbt_show.py @@ -24,6 +24,10 @@ limit 5 """ +model_with_null_json_struct = """ + select (struct(null)) as null_struct +""" + class TestBigQueryShowLimit(BaseShowLimit): pass @@ -40,7 +44,11 @@ class TestBigQueryShowSqlWorksWithJSONStruct: def models(self): return { "json_struct_model.sql": model_with_json_struct, + "null_json_struct_model.sql": model_with_null_json_struct, } def test_sql_header(self, project): run_dbt(["show", "--select", "json_struct_model"]) + + def test_show_with_null_json_struct(self, project): + run_dbt(["show", "--select", "null_json_struct_model"]) diff --git a/tests/functional/adapter/empty/test_empty.py b/tests/functional/adapter/empty/test_empty.py index c224c51df..3bf47f35d 100644 --- a/tests/functional/adapter/empty/test_empty.py +++ b/tests/functional/adapter/empty/test_empty.py @@ -1,5 +1,9 @@ -from dbt.tests.adapter.empty.test_empty import BaseTestEmpty +from dbt.tests.adapter.empty.test_empty import BaseTestEmpty, BaseTestEmptyInlineSourceRef class TestBigQueryEmpty(BaseTestEmpty): pass + + +class TestBigQueryEmptyInlineSourceRef(BaseTestEmptyInlineSourceRef): + pass diff --git a/tests/functional/adapter/incremental/incremental_strategy_fixtures.py b/tests/functional/adapter/incremental/incremental_strategy_fixtures.py index a8f0004c5..17391b48d 100644 --- a/tests/functional/adapter/incremental/incremental_strategy_fixtures.py +++ b/tests/functional/adapter/incremental/incremental_strategy_fixtures.py @@ -90,6 +90,63 @@ {% endif %} """.lstrip() +merge_time_with_require_partition_sql = """ +{{ + config( + materialized="incremental", + unique_key="id", + cluster_by="id", + partition_by={ + "field": "date_time", + "data_type": "dateTime" + }, + post_hook=" + create or replace view `{{ schema }}.incremental_merge_time_with_require_partition_view` + as select * from {{ this }} where date_time is null or date_time is not null + ", + require_partition_filter=true + ) +}} + + + +with data as ( + + {% if not is_incremental() %} + + select 1 as id, cast('2020-01-01' as datetime) as date_time union all + select 2 as id, cast('2020-01-01' as datetime) as date_time union all + select 3 as id, cast('2020-01-01' as datetime) as date_time union all + select 4 as id, cast('2020-01-01' as datetime) as date_time + + {% else %} + + select 1 as id, cast('2020-01-01' as datetime) as date_time union all + select 2 as id, cast('2020-01-01' as datetime) as date_time union all + select 3 as id, cast('2020-01-01' as datetime) as date_time union all + select 4 as id, cast('2020-01-02' as datetime) as date_time union all + select 5 as id, cast('2020-01-02' as datetime) as date_time union all + select 6 as id, cast('2020-01-02' as datetime) as date_time + + {% endif %} + +) + +select * from data + +{% if is_incremental() %} +where date_time > ( + select max(date_time) + from {{ this }} + where ( + date_time is null + or date_time is not null + ) +) + +{% endif %} +""".lstrip() + overwrite_date_sql = """ {{ config( diff --git a/tests/functional/adapter/incremental/test_incremental_strategies.py b/tests/functional/adapter/incremental/test_incremental_strategies.py index b3a51ad09..1a339d601 100644 --- a/tests/functional/adapter/incremental/test_incremental_strategies.py +++ b/tests/functional/adapter/incremental/test_incremental_strategies.py @@ -17,6 +17,7 @@ from tests.functional.adapter.incremental.incremental_strategy_fixtures import ( merge_range_sql, merge_time_sql, + merge_time_with_require_partition_sql, overwrite_date_sql, overwrite_day_sql, overwrite_day_with_copy_partitions_sql, @@ -39,6 +40,7 @@ def models(self): return { "incremental_merge_range.sql": merge_range_sql, "incremental_merge_time.sql": merge_time_sql, + "incremental_merge_time_with_require_partition.sql": merge_time_with_require_partition_sql, "incremental_overwrite_date.sql": overwrite_date_sql, "incremental_overwrite_day.sql": overwrite_day_sql, "incremental_overwrite_day_with_copy_partitions.sql": overwrite_day_with_copy_partitions_sql, @@ -65,13 +67,14 @@ def seeds(self): def test__bigquery_assert_incremental_configurations_apply_the_right_strategy(self, project): run_dbt(["seed"]) results = run_dbt() - assert len(results) == 11 + assert len(results) == 12 results = run_dbt() - assert len(results) == 11 + assert len(results) == 12 incremental_strategies = [ ("incremental_merge_range", "merge_expected"), ("incremental_merge_time", "merge_expected"), + ("incremental_merge_time_with_require_partition_view", "merge_expected"), ("incremental_overwrite_time", "incremental_overwrite_time_expected"), ("incremental_overwrite_date", "incremental_overwrite_date_expected"), ("incremental_overwrite_partitions", "incremental_overwrite_date_expected"), diff --git a/tests/functional/adapter/sources_freshness_tests/files.py b/tests/functional/adapter/sources_freshness_tests/files.py new file mode 100644 index 000000000..eaca96648 --- /dev/null +++ b/tests/functional/adapter/sources_freshness_tests/files.py @@ -0,0 +1,23 @@ +SCHEMA_YML = """version: 2 +sources: + - name: test_source + freshness: + warn_after: {count: 10, period: hour} + error_after: {count: 1, period: day} + schema: "{{ env_var('DBT_GET_LAST_RELATION_TEST_SCHEMA') }}" + tables: + - name: test_source +""" + +SEED_TEST_SOURCE_CSV = """ +id,name +1,Martin +2,Jeter +3,Ruth +4,Gehrig +5,DiMaggio +6,Torre +7,Mantle +8,Berra +9,Maris +""".strip() diff --git a/tests/functional/adapter/sources_freshness_tests/test_get_relation_last_modified.py b/tests/functional/adapter/sources_freshness_tests/test_get_relation_last_modified.py new file mode 100644 index 000000000..08e263edb --- /dev/null +++ b/tests/functional/adapter/sources_freshness_tests/test_get_relation_last_modified.py @@ -0,0 +1,30 @@ +import os +import pytest + +from dbt.tests.util import run_dbt + +from tests.functional.adapter.sources_freshness_tests import files + + +class TestGetLastRelationModified: + @pytest.fixture(scope="class") + def seeds(self): + return {"test_source.csv": files.SEED_TEST_SOURCE_CSV} + + @pytest.fixture(scope="class") + def models(self): + return {"schema.yml": files.SCHEMA_YML} + + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + # we need the schema name for the sources section + os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] = project.test_schema + run_dbt(["seed"]) + yield + del os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] + + def test_get_last_relation_modified(self, project): + results = run_dbt(["source", "freshness"]) + assert len(results) == 1 + result = results[0] + assert result.status == "pass" diff --git a/tests/functional/adapter/test_simple_seed.py b/tests/functional/adapter/test_simple_seed.py index b01f99346..5ec19d420 100644 --- a/tests/functional/adapter/test_simple_seed.py +++ b/tests/functional/adapter/test_simple_seed.py @@ -5,7 +5,6 @@ from dbt.tests.adapter.simple_seed.test_seed import BaseTestEmptySeed from dbt.tests.adapter.utils.base_utils import run_dbt - _SEED_CONFIGS_CSV = """ seed_id,stuff 1,a @@ -156,3 +155,38 @@ def test__bigquery_seed_table_with_labels_config_bigquery(self, project): class TestBigQueryEmptySeed(BaseTestEmptySeed): pass + + +class TestBigQuerySeedWithUniqueDelimiter(TestSimpleSeedConfigs): + @pytest.fixture(scope="class") + def seeds(self): + return { + "seed_enabled.csv": seeds__enabled_in_config_csv.replace(",", "|"), + "seed_tricky.csv": seeds__tricky_csv.replace(",", "\t"), + "seed_configs.csv": _SEED_CONFIGS_CSV, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "config-version": 2, + "seeds": { + "test": { + "enabled": False, + "quote_columns": True, + "seed_enabled": { + "enabled": True, + "+column_types": self.seed_enabled_types(), + "delimiter": "|", + }, + "seed_tricky": { + "enabled": True, + "+column_types": self.seed_tricky_types(), + "delimiter": "\t", + }, + "seed_configs": { + "enabled": True, + }, + }, + }, + } diff --git a/tests/functional/adapter/test_string_literal_macro.py b/tests/functional/adapter/test_string_literal_macro.py new file mode 100644 index 000000000..d67f4be71 --- /dev/null +++ b/tests/functional/adapter/test_string_literal_macro.py @@ -0,0 +1,17 @@ +import pytest +from dbt.tests.util import run_dbt + + +_MODEL_SQL = """ +select {{ dbt.string_literal('my multiline +string') }} as test +""" + + +class TestStringLiteralQuoting: + @pytest.fixture(scope="class") + def models(self): + return {"my_model.sql": _MODEL_SQL} + + def test_string_literal_quoting(self, project): + run_dbt() diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py index 58cecdc7e..384b17108 100644 --- a/tests/functional/adapter/utils/test_utils.py +++ b/tests/functional/adapter/utils/test_utils.py @@ -8,9 +8,11 @@ from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct from dbt.tests.adapter.utils.test_any_value import BaseAnyValue from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr +from dbt.tests.adapter.utils.test_cast import BaseCast from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText from dbt.tests.adapter.utils.test_concat import BaseConcat from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampAware +from dbt.tests.adapter.utils.test_date import BaseDate from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd from dbt.tests.adapter.utils.test_datediff import BaseDateDiff from dbt.tests.adapter.utils.test_date_spine import BaseDateSpine @@ -104,6 +106,10 @@ class TestBoolOr(BaseBoolOr): pass +class TestCast(BaseCast): + pass + + class TestCastBoolToText(BaseCastBoolToText): pass @@ -117,6 +123,10 @@ class TestCurrentTimestamp(BaseCurrentTimestampAware): pass +class TestDate(BaseDate): + pass + + class TestDateAdd(BaseDateAdd): pass diff --git a/tests/functional/python_model_tests/__init__.py b/tests/functional/python_model_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/functional/python_model_tests/files.py b/tests/functional/python_model_tests/files.py new file mode 100644 index 000000000..1cb95602a --- /dev/null +++ b/tests/functional/python_model_tests/files.py @@ -0,0 +1,125 @@ +SINGLE_RECORD = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table" + ) + + df = pd.DataFrame( + [ + {"column_name": {"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}}, + ] + ) + + return df +""" + + +MULTI_RECORD = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df +""" + + +ORC_FORMAT = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + intermediate_format="orc", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df +""" + + +ENABLE_LIST_INFERENCE = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + enable_list_inference="true", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df +""" + + +ENABLE_LIST_INFERENCE_PARQUET_FORMAT = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + enable_list_inference="true", + intermediate_format="parquet", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df +""" + + +DISABLE_LIST_INFERENCE_ORC_FORMAT = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + enable_list_inference="false", + intermediate_format="orc", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df + +""" diff --git a/tests/functional/python_model_tests/test_list_inference.py b/tests/functional/python_model_tests/test_list_inference.py new file mode 100644 index 000000000..143a61e88 --- /dev/null +++ b/tests/functional/python_model_tests/test_list_inference.py @@ -0,0 +1,36 @@ +""" +This test case addresses this regression: https://github.com/dbt-labs/dbt-bigquery/issues/1047 + +As the comments point out, the issue appears when the settings are: + - list inference: off + - intermediate format: parquet + +Adjusting either of these alleviates the issue. + +When the regression was first reported, `files.MULTI_RECORD` failed while the other models passed. +""" +from dbt.tests.util import run_dbt_and_capture +import pytest + +from tests.functional.python_model_tests import files + + +class TestPythonListInference: + @pytest.fixture(scope="class") + def models(self): + return { + # this is what worked prior to this issue + "single_record.py": files.SINGLE_RECORD, + # this is the model that initially failed for this issue + "multi_record.py": files.MULTI_RECORD, + # these are explicit versions of the default settings + "enable_list_inference.py": files.ENABLE_LIST_INFERENCE, + "enable_list_inference_parquet_format.py": files.ENABLE_LIST_INFERENCE_PARQUET_FORMAT, + # orc format also resolves the issue, regardless of list inference + "orc_format.py": files.ORC_FORMAT, + "disable_list_inference_orc_format.py": files.DISABLE_LIST_INFERENCE_ORC_FORMAT, + } + + def test_models_success(self, project, models): + result, output = run_dbt_and_capture(["run"]) + assert len(result) == len(models) diff --git a/tests/functional/test_job_timeout.py b/tests/functional/test_job_timeout.py index be559e816..57172e133 100644 --- a/tests/functional/test_job_timeout.py +++ b/tests/functional/test_job_timeout.py @@ -59,4 +59,5 @@ def profiles_config_update(self, dbt_profile_target): def test_job_timeout(self, project): result = run_dbt(["run"], expect_pass=False) # project setup will fail - assert f"Query exceeded configured timeout of {_SHORT_TIMEOUT}s" in result[0].message + expected_error = f"Operation did not complete within the designated timeout of {_SHORT_TIMEOUT} seconds." + assert expected_error in result[0].message diff --git a/tests/unit/test_bigquery_adapter.py b/tests/unit/test_bigquery_adapter.py index 34abd0caf..19d9dbd08 100644 --- a/tests/unit/test_bigquery_adapter.py +++ b/tests/unit/test_bigquery_adapter.py @@ -20,10 +20,9 @@ from dbt.adapters.bigquery.connections import _sanitize_label, _VALIDATE_LABEL_LENGTH_LIMIT from dbt_common.clients import agate_helper import dbt_common.exceptions -from dbt.context.manifest import generate_query_header_context +from dbt.context.query_header import generate_query_header_context from dbt.contracts.files import FileHash from dbt.contracts.graph.manifest import ManifestStateCheck -from dbt.logger import GLOBAL_LOGGER as logger # noqa from dbt.context.providers import RuntimeConfigObject, generate_runtime_macro_context from google.cloud.bigquery import AccessEntry diff --git a/tests/unit/test_bigquery_connection_manager.py b/tests/unit/test_bigquery_connection_manager.py index 6bb89ed36..9dc8fe219 100644 --- a/tests/unit/test_bigquery_connection_manager.py +++ b/tests/unit/test_bigquery_connection_manager.py @@ -1,19 +1,14 @@ -import time import json -import pytest import unittest from contextlib import contextmanager from requests.exceptions import ConnectionError from unittest.mock import patch, MagicMock, Mock, ANY import dbt.adapters -import dbt_common.dataclass_schema from dbt.adapters.bigquery import BigQueryCredentials from dbt.adapters.bigquery import BigQueryRelation from dbt.adapters.bigquery.connections import BigQueryConnectionManager -import dbt_common.exceptions -from dbt.logger import GLOBAL_LOGGER as logger # noqa class TestBigQueryConnectionManager(unittest.TestCase): @@ -123,26 +118,6 @@ def test_query_and_results(self, mock_bq): query="sql", job_config=mock_bq.QueryJobConfig(), timeout=15 ) - @patch("dbt.adapters.bigquery.impl.google.cloud.bigquery") - def test_query_and_results_timeout(self, mock_bq): - self.mock_client.query = Mock( - return_value=Mock(result=lambda *args, **kwargs: time.sleep(4)) - ) - with pytest.raises(dbt_common.exceptions.DbtRuntimeError) as exc: - self.connections._query_and_results( - self.mock_client, - "sql", - {"job_param_1": "blah"}, - job_creation_timeout=15, - job_execution_timeout=1, - ) - - mock_bq.QueryJobConfig.assert_called_once() - self.mock_client.query.assert_called_once_with( - query="sql", job_config=mock_bq.QueryJobConfig(), timeout=15 - ) - assert "Query exceeded configured timeout of 1s" in str(exc.value) - def test_copy_bq_table_appends(self): self._copy_table(write_disposition=dbt.adapters.bigquery.impl.WRITE_APPEND) args, kwargs = self.mock_client.copy_table.call_args diff --git a/tests/unit/test_renamed_relations.py b/tests/unit/test_renamed_relations.py new file mode 100644 index 000000000..8e787e6a3 --- /dev/null +++ b/tests/unit/test_renamed_relations.py @@ -0,0 +1,16 @@ +from dbt.adapters.bigquery.relation import BigQueryRelation +from dbt.adapters.contracts.relation import RelationType + + +def test_renameable_relation(): + relation = BigQueryRelation.create( + database="my_db", + schema="my_schema", + identifier="my_table", + type=RelationType.Table, + ) + assert relation.renameable_relations == frozenset( + { + RelationType.Table, + } + )