diff --git a/macros/macros.yml b/macros/macros.yml new file mode 100644 index 0000000..e576eaf --- /dev/null +++ b/macros/macros.yml @@ -0,0 +1,7 @@ +macros: + - name: safe_hash + description: This macro allows concatenation and hashing of fields that may contain `NULL` elements. For example, fields in an address. + arguments: + - name: columns + type: list[str] + description: A list of column names \ No newline at end of file diff --git a/macros/safe_hash.sql b/macros/safe_hash.sql new file mode 100644 index 0000000..0c59184 --- /dev/null +++ b/macros/safe_hash.sql @@ -0,0 +1,9 @@ +{%- macro safe_hash(columns) -%} +{% set coalesced_columns = [] %} +{%- for column in columns -%} + {% do coalesced_columns.append("COALESCE(" ~ column.lower() ~ ", '')") %} +{%- endfor -%} + MD5( + {{ dbt.concat(coalesced_columns) }} + ) +{%- endmacro -%} diff --git a/models/intermediate/int__locations.sql b/models/intermediate/int__locations.sql new file mode 100644 index 0000000..ae52150 --- /dev/null +++ b/models/intermediate/int__locations.sql @@ -0,0 +1,54 @@ +{% set address_columns = [ + "address_1", + "address_2", + "city", + "state", + "zip", + "county" +] %} + +WITH unioned_location_sources AS ( + SELECT DISTINCT + p.patient_address AS address_1 + , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS address_2 + , p.patient_city AS city + , s.state_abbreviation AS state + , p.patient_zip AS zip + , p.patient_county AS county + , {{ dbt.cast("null", api.Column.translate_type("integer")) }} AS country_concept_id + , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS country_source_value + , p.patient_latitude AS latitude + , p.patient_longitude AS longitude + FROM {{ ref("stg_synthea__patients") }} AS p + LEFT JOIN {{ ref('stg_map__states') }} AS s ON p.patient_state = s.state_name + + UNION + + SELECT DISTINCT + organization_address AS address_1 + , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS address_2 + , organization_city AS city + , organization_state AS state + , organization_zip AS zip + , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS county + , {{ dbt.cast("null", api.Column.translate_type("integer")) }} AS country_concept_id + , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS country_source_value + , organization_latitude AS latitude + , organization_longitude AS longitude + FROM + {{ ref("stg_synthea__organizations") }} +) + +SELECT + address_1 + , address_2 + , city + , state + , zip + , county + , {{ safe_hash(address_columns) }} AS location_source_value + , country_concept_id + , country_source_value + , latitude + , longitude +FROM unioned_location_sources diff --git a/models/intermediate/int__person.sql b/models/intermediate/int__person.sql index 8d78421..ec97731 100644 --- a/models/intermediate/int__person.sql +++ b/models/intermediate/int__person.sql @@ -1,3 +1,11 @@ +{% set address_columns = [ + "p.patient_address", + "p.patient_city", + "s.state_abbreviation", + "p.patient_zip", + "p.patient_county" +] %} + SELECT row_number() OVER (ORDER BY p.patient_id) AS person_id , CASE upper(p.patient_gender) @@ -19,7 +27,7 @@ SELECT WHEN upper(p.ethnicity) = 'NONHISPANIC' THEN 38003564 ELSE 0 END AS ethnicity_concept_id - , {{ dbt.cast("NULL", api.Column.translate_type("integer")) }} AS location_id + , loc.location_id , {{ dbt.cast("NULL", api.Column.translate_type("integer")) }} AS provider_id , {{ dbt.cast("NULL", api.Column.translate_type("integer")) }} AS care_site_id , p.patient_id AS person_source_value @@ -30,4 +38,7 @@ SELECT , p.ethnicity AS ethnicity_source_value , 0 AS ethnicity_source_concept_id FROM {{ ref('stg_synthea__patients') }} AS p +LEFT JOIN {{ ref('stg_map__states') }} AS s ON p.patient_state = s.state_name +LEFT JOIN {{ ref('location') }} AS loc + ON loc.location_source_value = {{ safe_hash(address_columns) }} WHERE p.patient_gender IS NOT NULL diff --git a/models/omop/care_site.sql b/models/omop/care_site.sql index b1cadab..2393ead 100644 --- a/models/omop/care_site.sql +++ b/models/omop/care_site.sql @@ -1,8 +1,18 @@ +{% set address_columns = [ + "organization_address", + "organization_city", + "organization_state", + "organization_zip", +] %} + SELECT ROW_NUMBER() OVER (ORDER BY organization_id) AS care_site_id , organization_name AS care_site_name , 0 AS place_of_service_concept_id - , {{ dbt.cast("null", api.Column.translate_type("integer")) }} AS location_id + , loc.location_id , organization_id AS care_site_source_value , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS place_of_service_source_value FROM {{ ref('stg_synthea__organizations') }} +LEFT JOIN {{ ref('location') }} AS loc + ON + loc.location_source_value = {{ safe_hash(address_columns) }} diff --git a/models/omop/location.sql b/models/omop/location.sql index 9a0b959..4fba0f9 100644 --- a/models/omop/location.sql +++ b/models/omop/location.sql @@ -1,15 +1,14 @@ SELECT - ROW_NUMBER() OVER () AS location_id - , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS address_1 - , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS address_2 - , p.patient_city AS city - , s.state_abbreviation AS state - , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS county - , p.patient_zip AS zip - , p.patient_zip AS location_source_value - , {{ dbt.cast("null", api.Column.translate_type("integer")) }} AS country_concept_id - , {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS country_source_value - , {{ dbt.cast("null", api.Column.translate_type("decimal")) }} AS latitude - , {{ dbt.cast("null", api.Column.translate_type("decimal")) }} AS longitude -FROM {{ ref('stg_synthea__patients') }} AS p -LEFT JOIN {{ ref('stg_map__states') }} AS s ON p.patient_state = s.state_name + row_number() OVER (ORDER BY state, city, address_1, location_source_value) AS location_id + , address_1 + , address_2 + , city + , state + , zip + , county + , location_source_value + , country_concept_id + , country_source_value + , latitude + , longitude +FROM {{ ref('int__locations') }}