-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Danny Chiao <[email protected]>
- Loading branch information
Showing
46 changed files
with
604 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import os | ||
from airflow.decorators import task | ||
from airflow.models.dag import DAG | ||
from airflow.operators.bash import BashOperator | ||
from feast import RepoConfig, FeatureStore | ||
from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( | ||
SparkOfflineStoreConfig, | ||
) | ||
from feast.repo_config import RegistryConfig | ||
from feast.infra.online_stores.dynamodb import DynamoDBOnlineStoreConfig | ||
import pendulum | ||
|
||
with DAG( | ||
dag_id="feature_dag", | ||
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), | ||
description="A dbt + Feast DAG", | ||
schedule="@daily", | ||
catchup=False, | ||
tags=["feast"], | ||
) as dag: | ||
dbt_test = BashOperator( | ||
task_id="dbt_test", | ||
bash_command=""" | ||
cd ${AIRFLOW_HOME}; dbt test --models "aggregate_transaction_features" | ||
""", | ||
dag=dag, | ||
) | ||
|
||
# Generate new transformed feature values | ||
dbt_run = BashOperator( | ||
task_id="dbt_run", | ||
bash_command=""" | ||
cd ${AIRFLOW_HOME}; dbt run --models "aggregate_transaction_features" | ||
""", | ||
dag=dag, | ||
) | ||
|
||
# Use Feast to make these feature values available in a low latency store | ||
@task() | ||
def materialize(data_interval_start=None, data_interval_end=None): | ||
repo_config = RepoConfig( | ||
registry=RegistryConfig( | ||
registry_type="sql", | ||
path="postgresql://postgres:mysecretpassword@[YOUR-RDS-ENDPOINT:PORT]/feast", | ||
), | ||
project="feast_demo", | ||
provider="local", | ||
offline_store=SparkOfflineStoreConfig( | ||
spark_conf={ | ||
"spark.ui.enabled": "false", | ||
"spark.eventLog.enabled": "false", | ||
"spark.sql.catalogImplementation": "hive", | ||
"spark.sql.parser.quotedRegexColumnNames": "true", | ||
"spark.sql.session.timeZone": "UTC", | ||
} | ||
), | ||
online_store=DynamoDBOnlineStoreConfig(region="us-west-1"), | ||
entity_key_serialization_version=2, | ||
) | ||
# Needed for Mac OS users because of a seg fault in requests for standalone Airflow (not needed in prod) | ||
os.environ["NO_PROXY"] = "*" | ||
os.environ["SPARK_LOCAL_IP"] = "127.0.0.1" | ||
store = FeatureStore(config=repo_config) | ||
# Add 1 hr overlap to account for late data | ||
# Note: normally, you'll probably have different feature views with different freshness requirements, instead | ||
# of materializing all feature views every day. | ||
store.materialize(data_interval_start.subtract(hours=1), data_interval_end) | ||
|
||
# Setup DAG | ||
dbt_test >> dbt_run >> materialize() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Airflow needs a home. `~/airflow` is the default, but you can put it | ||
# somewhere else if you prefer (optional) | ||
export AIRFLOW_HOME=$(pwd)/airflow_home | ||
export AIRFLOW__CORE__LOAD_EXAMPLES=False | ||
# TODO: UPDATE THIS | ||
export AIRFLOW_CONN_DATABRICKS_DEFAULT='databricks://@host-url?token=yourtoken' | ||
|
||
# Cleanup previous state, if it exists | ||
rm -rf $AIRFLOW_HOME | ||
|
||
# Install Airflow using the constraints file | ||
AIRFLOW_VERSION=2.4.0 | ||
PYTHON_VERSION="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)" | ||
# For example: 3.7 | ||
CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt" | ||
# For example: https://raw.githubusercontent.com/apache/airflow/constraints-2.4.0/constraints-3.7.txt | ||
pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" | ||
|
||
# Setup Feast dags | ||
mkdir -p $AIRFLOW_HOME/dags | ||
cp dag.py $AIRFLOW_HOME/dags | ||
|
||
# Setup dbt dags | ||
cd ../dbt/feast_demo | ||
cp -R * $AIRFLOW_HOME | ||
cd $AIRFLOW_HOME | ||
|
||
# The Standalone command will initialise the database, make a user, | ||
# and start all components for you. | ||
airflow standalone | ||
|
||
# Visit localhost:8080 in the browser and use the admin account details | ||
# shown on the terminal to login. |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
|
||
# Name your project! Project names should contain only lowercase characters | ||
# and underscores. A good package name should reflect your organization's | ||
# name or the intended use of these models | ||
name: 'feast_demo' | ||
version: '1.0.0' | ||
config-version: 2 | ||
|
||
# This setting configures which "profile" dbt uses for this project. | ||
profile: 'feast_demo' | ||
|
||
# These configurations specify where dbt should look for different types of files. | ||
# The `model-paths` config, for example, states that models in this project can be | ||
# found in the "models/" directory. You probably won't need to change these! | ||
model-paths: ["models"] | ||
macro-paths: ["macros"] | ||
|
||
|
||
target-path: "target" # directory which will store compiled SQL files | ||
clean-targets: # directories to be removed by `dbt clean` | ||
- "target" | ||
- "dbt_packages" | ||
|
||
models: | ||
feast_demo: | ||
example: | ||
materialized: view | ||
location_root: '[YOUR S3 BUCKET/FOLDER]' | ||
file_format: 'delta' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{% macro prev_day_partition() %} | ||
(SELECT MAX(timestamp)::date FROM {{ this }}) | ||
{% endmacro %} | ||
|
||
{% macro next_day_partition() %} | ||
(SELECT date_add(MAX(timestamp)::date, 1) FROM {{ this }}) | ||
{% endmacro %} |
42 changes: 42 additions & 0 deletions
42
module_3_db/dbt/feast_demo/models/example/aggregate_transaction_features.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
{{ | ||
config( | ||
materialized='incremental', | ||
file_format='parquet', | ||
incremental_strategy='append' | ||
) | ||
}} | ||
|
||
SELECT * | ||
FROM | ||
( | ||
SELECT | ||
user_id, | ||
to_timestamp(timestamp) AS timestamp, | ||
SUM(amt) OVER ( | ||
PARTITION BY user_id | ||
ORDER BY to_timestamp(timestamp) | ||
RANGE BETWEEN INTERVAL 1 day PRECEDING AND CURRENT ROW | ||
) AS amt_sum_1d_10m, | ||
AVG(amt) OVER ( | ||
PARTITION BY user_id | ||
ORDER BY to_timestamp(timestamp) | ||
RANGE BETWEEN INTERVAL 1 day PRECEDING AND CURRENT ROW | ||
) AS amt_mean_1d_10m | ||
FROM demo_fraud_v2.transactions | ||
{% if is_incremental() %} | ||
WHERE | ||
partition_0 BETWEEN date_format({{ prev_day_partition() }}, "yyyy") AND date_format({{ next_day_partition() }}, "yyyy") AND | ||
partition_1 BETWEEN date_format({{ prev_day_partition() }}, "MM") AND date_format({{ next_day_partition() }}, "MM") AND | ||
partition_2 BETWEEN date_format({{ prev_day_partition() }}, "dd") AND date_format({{ next_day_partition() }}, "dd") | ||
{% else %} | ||
-- Hack to simulate we started on 2021-06-01 | ||
WHERE | ||
partition_0 = "2022" AND | ||
partition_1 = "04" AND | ||
partition_2 = "20" | ||
{% endif %} | ||
) | ||
{% if is_incremental() %} | ||
-- Need to only produce values in this window | ||
WHERE timestamp > (SELECT MAX(timestamp) FROM {{ this }}) | ||
{% endif %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
|
||
version: 2 | ||
|
||
models: | ||
- name: aggregate_transaction_features | ||
description: "" | ||
columns: | ||
- name: "user_id" | ||
description: "The primary key for this table" | ||
tests: | ||
- not_null |
File renamed without changes
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from feast import PushSource | ||
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( | ||
SparkSource, | ||
) | ||
|
||
# Historical log of transactions stream | ||
transactions_source = SparkSource( | ||
name="transactions_source", | ||
table="demo_fraud_v2.transactions", | ||
timestamp_field="timestamp", | ||
) | ||
|
||
# Precomputed aggregate transaction feature values (batch / stream) | ||
aggregate_transactions_source = PushSource( | ||
name="transactions_1d", | ||
batch_source=SparkSource( | ||
name="transactions_1d_batch", | ||
table="demo_fraud_v2.aggregate_transaction_features", | ||
timestamp_field="timestamp", | ||
tags={"dbtModel": "models/example/aggregate_transaction_features.sql"}, | ||
), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from feast import ( | ||
Entity, | ||
) | ||
|
||
user = Entity( | ||
name="user", | ||
join_keys=["user_id"], | ||
description="user id", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from feast import FeatureService | ||
|
||
from features import * | ||
|
||
feature_service_1 = FeatureService( | ||
name="model_v1", | ||
features=[user_transaction_amount_metrics], | ||
owner="[email protected]", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
project: feast_demo | ||
provider: aws | ||
registry: # where this repo's metadata is stored | ||
registry_type: sql | ||
path: postgresql://postgres:mysecretpassword@[your-rds-instance]:5432/feast | ||
online_store: # low latency online storage | ||
type: dynamodb | ||
region: us-west-1 | ||
offline_store: | ||
type: spark | ||
spark_conf: # Note: pip install -U "databricks-connect" | ||
spark.ui.enabled: "false" | ||
spark.eventLog.enabled: "false" | ||
spark.sql.catalogImplementation: "hive" | ||
spark.sql.parser.quotedRegexColumnNames: "true" | ||
spark.sql.session.timeZone: "UTC" | ||
entity_key_serialization_version: 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from datetime import timedelta | ||
|
||
from feast import ( | ||
FeatureView, | ||
Field, | ||
) | ||
from feast.types import String, Float64 | ||
|
||
from data_sources import * | ||
from entities import * | ||
|
||
user_transaction_amount_metrics = FeatureView( | ||
name="user_transaction_amount_metrics", | ||
description="User transaction features", | ||
entities=[user], | ||
ttl=timedelta(seconds=8640000000), | ||
schema=[ | ||
Field(name="user_id", dtype=String), | ||
Field(name="amt_sum_1d_10m", dtype=Float64), | ||
Field(name="amt_mean_1d_10m", dtype=Float64), | ||
], | ||
online=True, | ||
source=aggregate_transactions_source, | ||
tags={"production": "True"}, | ||
owner="[email protected]", | ||
) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
|
||
target/ | ||
dbt_packages/ | ||
logs/ |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.