Skip to content

Commit

Permalink
[HWORKS-850] Add event_time to all tutorials with 2 or more fgs (#218)
Browse files Browse the repository at this point in the history
Add event_time to all tutorials with 2 or more fgs
  • Loading branch information
Maxxx-zh authored Nov 29, 2023
1 parent f9989b0 commit d39d676
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@
" version=1,\n",
" primary_key=['sk_id_curr'],\n",
" online_enabled=False,\n",
" event_time='datetime',\n",
")\n",
"applications_fg.insert(\n",
" applications_df,\n",
Expand Down Expand Up @@ -467,6 +468,7 @@
" version=1,\n",
" primary_key=['sk_id_curr','sk_id_bureau'],\n",
" online_enabled=False,\n",
" event_time='datetime',\n",
")\n",
"bureaus_fg.insert(\n",
" bureaus_df,\n",
Expand Down Expand Up @@ -494,6 +496,7 @@
" version=1,\n",
" primary_key=['sk_id_prev','sk_id_curr'],\n",
" online_enabled=False,\n",
" event_time='datetime',\n",
")\n",
"previous_applications_fg.insert(\n",
" previous_applications_df,\n",
Expand Down Expand Up @@ -548,6 +551,7 @@
" version=1,\n",
" primary_key=['sk_id_prev','sk_id_curr'],\n",
" online_enabled=False,\n",
" event_time='datetime',\n",
")\n",
"installment_payments_fg.insert(\n",
" installment_payments_df,\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"outputs": [],
"source": [
"from functions import generate_data\n",
"from datetime import datetime\n",
"\n",
"#ignore warnings\n",
"import warnings\n",
Expand Down Expand Up @@ -267,6 +268,18 @@
"## <span style=\"color:#ff5f27;\"> 🧙🏼‍♂️ Data Generation </span>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd420aa0",
"metadata": {},
"outputs": [],
"source": [
"# Get today's date and time\n",
"today_date_time = datetime.now()\n",
"today_date_time"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -275,6 +288,7 @@
"outputs": [],
"source": [
"applications_df_generated = generate_data(applications_df)\n",
"applications_df_generated['datetime'] = today_date_time\n",
"applications_df_generated.head(3)"
]
},
Expand All @@ -297,6 +311,7 @@
"outputs": [],
"source": [
"bureaus_df_generated = generate_data(bureaus_df)\n",
"bureaus_df_generated['datetime'] = today_date_time\n",
"bureaus_df_generated.head(3)"
]
},
Expand All @@ -319,6 +334,7 @@
"outputs": [],
"source": [
"installment_payments_df_generated = generate_data(installment_payments_df)\n",
"installment_payments_df_generated['datetime'] = today_date_time\n",
"installment_payments_df_generated.head(3)"
]
},
Expand All @@ -341,6 +357,7 @@
"outputs": [],
"source": [
"previous_applications_df_generated = generate_data(previous_applications_df)\n",
"previous_applications_df_generated['datetime'] = today_date_time\n",
"previous_applications_df_generated.head(3)"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,9 @@
"outputs": [],
"source": [
"# Build a query object \n",
"query = bureaus_fg.select_except(['sk_id_curr','sk_id_bureau'])\\\n",
"query = bureaus_fg.select_except(['sk_id_curr','sk_id_bureau','datetime'])\\\n",
" .join(applications_fg.select_except(['sk_id_curr',\n",
" 'datetime',\n",
" 'flag_mobil',\n",
" *[f'flag_document_{num}'\n",
" for num\n",
Expand All @@ -225,11 +226,11 @@
" 'amt_credit', 'weekday_appr_process_start',\n",
" 'hour_appr_process_start']))\\\n",
" .join(bureau_balances_fg.select_except(['sk_id_bureau','months_balance']))\\\n",
" .join(previous_applications_fg.select_except(['sk_id_prev', 'sk_id_curr',\n",
" .join(previous_applications_fg.select_except(['sk_id_prev', 'sk_id_curr','datetime',\n",
" 'name_contract_type', 'name_contract_status']))\\\n",
" .join(pos_cash_balances_fg.select_except(['sk_id_prev','sk_id_curr', 'months_balance',\n",
" 'name_contract_status', 'sk_dpd', 'sk_dpd_def']))\\\n",
" .join(installment_payments_fg.select_except(['sk_id_prev', 'sk_id_curr']))\\\n",
" .join(installment_payments_fg.select_except(['sk_id_prev', 'sk_id_curr', 'datetime']))\\\n",
" .join(credit_card_balances_fg.select_except(['sk_id_prev', 'sk_id_curr']))\\\n",
" .join(previous_loan_counts_fg.select_except('sk_id_curr'))\n",
"\n",
Expand Down
8 changes: 6 additions & 2 deletions churn/1_churn_feature_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@
" version=1,\n",
" description=\"Customer info for churn prediction.\",\n",
" primary_key=['customerID'],\n",
" event_time=\"datetime\",\n",
")"
]
},
Expand Down Expand Up @@ -244,7 +245,8 @@
" {\"name\": \"paperlessbilling\", \"description\": \"Whether customer has paperless billing or not\"}, \n",
" {\"name\": \"monthlycharges\", \"description\": \"Monthly charges\"}, \n",
" {\"name\": \"totalcharges\", \"description\": \"Total charges\"},\n",
" {\"name\": \"churn\", \"description\": \"Whether customer has left within the last month or not\"}, \n",
" {\"name\": \"churn\", \"description\": \"Whether customer has left within the last month or not\"},\n",
" {\"name\": \"datetime\", \"description\": \"Date when the customer information was recorded\"},\n",
"]\n",
"\n",
"for desc in feature_descriptions: \n",
Expand Down Expand Up @@ -302,6 +304,7 @@
" version=1,\n",
" description=\"Customer subscription info for churn prediction.\",\n",
" primary_key=['customerID'],\n",
" event_time=\"datetime\",\n",
")\n",
"# Insert data into feature group\n",
"subscriptions_fg.insert(\n",
Expand All @@ -328,7 +331,8 @@
" {\"name\": \"phoneservice\", \"description\": \"Whether customer has signed up for phone service\"}, \n",
" {\"name\": \"techsupport\", \"description\": \"Whether customer has signed up for tech support service\"}, \n",
" {\"name\": \"streamingmovies\", \"description\": \"Whether customer has signed up for streaming movies service\"}, \n",
" {\"name\": \"streamingtv\", \"description\": \"Whether customer has signed up for streaming TV service\"}, \n",
" {\"name\": \"streamingtv\", \"description\": \"Whether customer has signed up for streaming TV service\"},\n",
" {\"name\": \"datetime\", \"description\": \"Date when the customer information was recorded\"},\n",
"]\n",
"\n",
"for desc in feature_descriptions: \n",
Expand Down
4 changes: 2 additions & 2 deletions churn/2_churn_training_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@
"outputs": [],
"source": [
"# Select features for training data\n",
"query = customer_info_fg.select_except([\"customerid\"]) \\\n",
"query = customer_info_fg.select_except([\"customerid\", \"datetime\"]) \\\n",
" .join(demography_fg.select_except([\"customerid\"])) \\\n",
" .join(subscriptions_fg.select_all())\n",
" .join(subscriptions_fg.select_except([\"datetime\"]))\n",
"\n",
"# uncomment this if you would like to view query result\n",
"# query.show(5)"
Expand Down

0 comments on commit d39d676

Please sign in to comment.