forked from dbt-labs/dbt-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
deduplicate.sql
46 lines (40 loc) · 1.53 KB
/
deduplicate.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
{%- macro deduplicate(relation, group_by, order_by=none, relation_alias=none) -%}
{{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, group_by, order_by=order_by, relation_alias=relation_alias)) }}
{% endmacro %}
{%- macro default__deduplicate(relation, group_by, order_by=none, relation_alias=none) -%}
select
{{ dbt_utils.star(relation, relation_alias='deduped') | indent }}
from (
select
_inner.*,
row_number() over (
partition by {{ group_by }}
{% if order_by is not none -%}
order by {{ order_by }}
{%- endif %}
) as rn
from {{ relation if relation_alias is none else relation_alias }} as _inner
) as deduped
where deduped.rn = 1
{%- endmacro -%}
{#
-- It is more performant to deduplicate using `array_agg` with a limit
-- clause in BigQuery:
-- https://github.com/dbt-labs/dbt-utils/issues/335#issuecomment-788157572
#}
{%- macro bigquery__deduplicate(relation, group_by, order_by=none, relation_alias=none) -%}
select
{{ dbt_utils.star(relation, relation_alias='deduped') | indent }}
from (
select
array_agg (
original
{% if order_by is not none -%}
order by {{ order_by }}
{%- endif %}
limit 1
)[offset(0)] as deduped
from {{ relation if relation_alias is none else relation_alias }} as original
group by {{ group_by }}
)
{%- endmacro -%}