Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add AWS Athena support #383

Merged
merged 11 commits into from
Feb 1, 2024
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@

# DBT artifacts
target/
logs/
dbt_modules/
dbt_packages/
.vscode
integration_tests/state/
site/
env/

# IDE
.vscode
.idea

# MacOS
.DS_Store
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Currently, the following adapters are supported:
- Snowflake
- DuckDB
- Trino (tested with Iceberg connector)
- AWS Athena
svdimchenko marked this conversation as resolved.
Show resolved Hide resolved

## Using This Package

Expand Down
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ vars:
chained_views_threshold: "{{ 5 if target.type != 'trino' else 4 }}"

# -- Execution variables --
insert_batch_size: "{{ 500 if target.type == 'bigquery' else 10000 }}"
insert_batch_size: "{{ 500 if target.type in ['athena', 'bigquery'] else 10000 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type == 'trino' else -1 }}"
58 changes: 39 additions & 19 deletions macros/recursive_dag.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,28 @@

{% macro default__recursive_dag() %}

with recursive direct_relationships as (
select
with recursive direct_relationships
{%- if target.type == 'athena' %}
svdimchenko marked this conversation as resolved.
Show resolved Hide resolved
(
unique_id,
resource_id,
resource_name,
resource_type,
file_path,
directory_path,
file_name,
model_type,
materialized,
is_public,
access,
source_name,
is_excluded,
direct_parent_id,
is_primary_test_relationship
)
{%- endif %}
as (
select
*
from {{ ref('int_direct_relationships') }}
where resource_type <> 'test'
Expand Down Expand Up @@ -44,7 +64,7 @@ all_relationships (
path,
is_dependent_on_chain_of_views
) as (
-- anchor
-- anchor
select distinct
resource_id as parent_id,
resource_name as parent,
Expand Down Expand Up @@ -76,11 +96,11 @@ all_relationships (

from direct_relationships
-- where direct_parent_id is null {# optional lever to change filtering of anchor clause to only include root resources #}

union all

-- recursive clause
select
select
all_relationships.parent_id as parent_id,
all_relationships.parent as parent,
all_relationships.parent_resource_type as parent_resource_type,
Expand All @@ -105,12 +125,12 @@ all_relationships (
direct_relationships.directory_path as child_directory_path,
direct_relationships.file_name as child_file_name,
direct_relationships.is_excluded as child_is_excluded,
all_relationships.distance+1 as distance,
all_relationships.distance+1 as distance,
{{ dbt.array_append('all_relationships.path', 'direct_relationships.resource_name') }} as path,
case
when
all_relationships.child_materialized in ('view', 'ephemeral')
and coalesce(all_relationships.is_dependent_on_chain_of_views, true)
case
when
all_relationships.child_materialized in ('view', 'ephemeral')
and coalesce(all_relationships.is_dependent_on_chain_of_views, true)
then true
else false
end as is_dependent_on_chain_of_views
Expand Down Expand Up @@ -145,7 +165,7 @@ all_relationships (
{% endif %}

with direct_relationships as (
select
select
*
from {{ ref('int_direct_relationships') }}
where resource_type <> 'test'
Expand All @@ -161,12 +181,12 @@ with direct_relationships as (
is_public as child_is_public,
access as child_access,
is_excluded as child_is_excluded

from direct_relationships
)

, cte_0 as (
select
select
parent_id,
child_id,
child_materialized,
Expand All @@ -182,19 +202,19 @@ with direct_relationships as (
{% for i in range(1,max_depth) %}
{% set prev_cte_path %}cte_{{ i - 1 }}.path{% endset %}
, cte_{{i}} as (
select
select
cte_{{i - 1}}.parent_id as parent_id,
direct_relationships.resource_id as child_id,
direct_relationships.materialized as child_materialized,
direct_relationships.is_public as child_is_public,
direct_relationships.access as child_access,
direct_relationships.is_excluded as child_is_excluded,
cte_{{i - 1}}.distance+1 as distance,
cte_{{i - 1}}.distance+1 as distance,
{{ dbt.array_append(prev_cte_path, 'direct_relationships.resource_name') }} as path,
case
when
cte_{{i - 1}}.child_materialized in ('view', 'ephemeral')
and coalesce(cte_{{i - 1}}.is_dependent_on_chain_of_views, true)
case
when
cte_{{i - 1}}.child_materialized in ('view', 'ephemeral')
and coalesce(cte_{{i - 1}}.is_dependent_on_chain_of_views, true)
then true
else false
end as is_dependent_on_chain_of_views
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ conversion as (

final as (
select
{{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
{{ dbt.current_timestamp() if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(is_described_model) as documented_models,
round(sum(is_described_model) * 100.00 / count(*), 2) as documentation_coverage_pct,
Expand Down
4 changes: 2 additions & 2 deletions models/marts/tests/fct_test_coverage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ conversion as (

final as (
select
{{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
{{ dbt.current_timestamp() if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(number_of_tests_on_model) as total_tests,
sum(is_tested_model) as tested_models,
Expand All @@ -39,4 +39,4 @@ final as (
on test_counts.resource_name = conversion.resource_name
)

select * from final
select * from final