Skip to content

Commit

Permalink
Merge pull request #83 from OHDSI/ladams-location-fix-attempt
Browse files Browse the repository at this point in the history
refactor: build location model properly using patient and organization as a source
  • Loading branch information
lawrenceadams authored Oct 24, 2024
2 parents 4c34141 + acb3522 commit 8ff8911
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 16 deletions.
7 changes: 7 additions & 0 deletions macros/macros.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
macros:
- name: safe_hash
description: This macro allows concatenation and hashing of fields that may contain `NULL` elements. For example, fields in an address.
arguments:
- name: columns
type: list[str]
description: A list of column names
9 changes: 9 additions & 0 deletions macros/safe_hash.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{%- macro safe_hash(columns) -%}
{% set coalesced_columns = [] %}
{%- for column in columns -%}
{% do coalesced_columns.append("COALESCE(" ~ column.lower() ~ ", '')") %}
{%- endfor -%}
MD5(
{{ dbt.concat(coalesced_columns) }}
)
{%- endmacro -%}
54 changes: 54 additions & 0 deletions models/intermediate/int__locations.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{% set address_columns = [
"address_1",
"address_2",
"city",
"state",
"zip",
"county"
] %}

WITH unioned_location_sources AS (
SELECT DISTINCT
p.patient_address AS address_1
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS address_2
, p.patient_city AS city
, s.state_abbreviation AS state
, p.patient_zip AS zip
, p.patient_county AS county
, {{ dbt.cast("null", api.Column.translate_type("integer")) }} AS country_concept_id
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS country_source_value
, p.patient_latitude AS latitude
, p.patient_longitude AS longitude
FROM {{ ref("stg_synthea__patients") }} AS p
LEFT JOIN {{ ref('stg_map__states') }} AS s ON p.patient_state = s.state_name

UNION

SELECT DISTINCT
organization_address AS address_1
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS address_2
, organization_city AS city
, organization_state AS state
, organization_zip AS zip
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS county
, {{ dbt.cast("null", api.Column.translate_type("integer")) }} AS country_concept_id
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS country_source_value
, organization_latitude AS latitude
, organization_longitude AS longitude
FROM
{{ ref("stg_synthea__organizations") }}
)

SELECT
address_1
, address_2
, city
, state
, zip
, county
, {{ safe_hash(address_columns) }} AS location_source_value
, country_concept_id
, country_source_value
, latitude
, longitude
FROM unioned_location_sources
13 changes: 12 additions & 1 deletion models/intermediate/int__person.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
{% set address_columns = [
"p.patient_address",
"p.patient_city",
"s.state_abbreviation",
"p.patient_zip",
"p.patient_county"
] %}

SELECT
row_number() OVER (ORDER BY p.patient_id) AS person_id
, CASE upper(p.patient_gender)
Expand All @@ -19,7 +27,7 @@ SELECT
WHEN upper(p.ethnicity) = 'NONHISPANIC' THEN 38003564
ELSE 0
END AS ethnicity_concept_id
, {{ dbt.cast("NULL", api.Column.translate_type("integer")) }} AS location_id
, loc.location_id
, {{ dbt.cast("NULL", api.Column.translate_type("integer")) }} AS provider_id
, {{ dbt.cast("NULL", api.Column.translate_type("integer")) }} AS care_site_id
, p.patient_id AS person_source_value
Expand All @@ -30,4 +38,7 @@ SELECT
, p.ethnicity AS ethnicity_source_value
, 0 AS ethnicity_source_concept_id
FROM {{ ref('stg_synthea__patients') }} AS p
LEFT JOIN {{ ref('stg_map__states') }} AS s ON p.patient_state = s.state_name
LEFT JOIN {{ ref('location') }} AS loc
ON loc.location_source_value = {{ safe_hash(address_columns) }}
WHERE p.patient_gender IS NOT NULL
12 changes: 11 additions & 1 deletion models/omop/care_site.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
{% set address_columns = [
"organization_address",
"organization_city",
"organization_state",
"organization_zip",
] %}

SELECT
ROW_NUMBER() OVER (ORDER BY organization_id) AS care_site_id
, organization_name AS care_site_name
, 0 AS place_of_service_concept_id
, {{ dbt.cast("null", api.Column.translate_type("integer")) }} AS location_id
, loc.location_id
, organization_id AS care_site_source_value
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS place_of_service_source_value
FROM {{ ref('stg_synthea__organizations') }}
LEFT JOIN {{ ref('location') }} AS loc
ON
loc.location_source_value = {{ safe_hash(address_columns) }}
27 changes: 13 additions & 14 deletions models/omop/location.sql
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
SELECT
ROW_NUMBER() OVER () AS location_id
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS address_1
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS address_2
, p.patient_city AS city
, s.state_abbreviation AS state
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS county
, p.patient_zip AS zip
, p.patient_zip AS location_source_value
, {{ dbt.cast("null", api.Column.translate_type("integer")) }} AS country_concept_id
, {{ dbt.cast("null", api.Column.translate_type("varchar")) }} AS country_source_value
, {{ dbt.cast("null", api.Column.translate_type("decimal")) }} AS latitude
, {{ dbt.cast("null", api.Column.translate_type("decimal")) }} AS longitude
FROM {{ ref('stg_synthea__patients') }} AS p
LEFT JOIN {{ ref('stg_map__states') }} AS s ON p.patient_state = s.state_name
row_number() OVER (ORDER BY state, city, address_1, location_source_value) AS location_id
, address_1
, address_2
, city
, state
, zip
, county
, location_source_value
, country_concept_id
, country_source_value
, latitude
, longitude
FROM {{ ref('int__locations') }}

0 comments on commit 8ff8911

Please sign in to comment.