[FSTORE-947] Tutorials Streamlit Fix (#179)

Streamlit Fix
logicalclocks · Jul 12, 2023 · 5704cff · 5704cff
1 parent 5a92fe9
commit 5704cff
Show file tree

Hide file tree

Showing 12 changed files with 55 additions and 63 deletions.
diff --git a/advanced_tutorials/air_quality/streamlit_app.py b/advanced_tutorials/air_quality/streamlit_app.py
@@ -12,8 +12,7 @@
 from streamlit_folium import st_folium
 
 from functions import *
-import features.air_quality
-
+from features import air_quality
 
 
 def print_fancy_header(text, font_size=22, color="#ff5f27"):
@@ -232,7 +231,7 @@ def plot_pm2_5(df):
 
             df_aq_temp['date'] = pd.to_datetime(df_aq_temp['date'])
 
-            df_aq_temp = feature_engineer_aq(df_aq_temp)
+            df_aq_temp = air_quality.feature_engineer_aq(df_aq_temp)
 
             # we need only the last row (one city, one day)
             df_aq_temp = df_aq_temp[df_aq_temp['city_name'] == city_name].tail(1)

diff --git a/advanced_tutorials/bitcoin/1_backfill_feature_groups.ipynb b/advanced_tutorials/bitcoin/1_backfill_feature_groups.ipynb
@@ -74,14 +74,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Fill in, if you are running on Colab, otherwise ignore this cell \n",
-    "os.environ['TWITTER_API_KEY'] = '{YOUR_API_KEY}'\n",
-    "os.environ['TWITTER_API_SECRET'] = '{YOUR_API_KEY}'\n",
-    "os.environ['TWITTER_ACCESS_TOKEN'] = '{YOUR_API_KEY}'\n",
-    "os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = '{YOUR_API_KEY}'\n",
+    "# Uncomment and fill in if you are running on Colab\n",
+    "# os.environ['TWITTER_API_KEY'] = '{YOUR_API_KEY}'\n",
+    "# os.environ['TWITTER_API_SECRET'] = '{YOUR_API_KEY}'\n",
+    "# os.environ['TWITTER_ACCESS_TOKEN'] = '{YOUR_API_KEY}'\n",
+    "# os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = '{YOUR_API_KEY}'\n",
     "\n",
-    "os.environ['BINANCE_API_KEY'] = '{YOUR_API_KEY}'\n",
-    "os.environ['BINANCE_API_SECRET'] = '{YOUR_API_KEY}'"
+    "# os.environ['BINANCE_API_KEY'] = '{YOUR_API_KEY}'\n",
+    "# os.environ['BINANCE_API_SECRET'] = '{YOUR_API_KEY}'"
    ]
   },
   {

diff --git a/advanced_tutorials/bitcoin/2_feature_pipeline.ipynb b/advanced_tutorials/bitcoin/2_feature_pipeline.ipynb
@@ -64,14 +64,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Fill in, if you are running on Colab, otherwise ignore this cell \n",
-    "os.environ['TWITTER_API_KEY'] = '{YOUR_API_KEY}'\n",
-    "os.environ['TWITTER_API_SECRET'] = '{YOUR_API_KEY}'\n",
-    "os.environ['TWITTER_ACCESS_TOKEN'] = '{YOUR_API_KEY}'\n",
-    "os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = '{YOUR_API_KEY}'\n",
+    "# Uncomment and fill in if you are running on Colab\n",
+    "# os.environ['TWITTER_API_KEY'] = '{YOUR_API_KEY}'\n",
+    "# os.environ['TWITTER_API_SECRET'] = '{YOUR_API_KEY}'\n",
+    "# os.environ['TWITTER_ACCESS_TOKEN'] = '{YOUR_API_KEY}'\n",
+    "# os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = '{YOUR_API_KEY}'\n",
     "\n",
-    "os.environ['BINANCE_API_KEY'] = '{YOUR_API_KEY}'\n",
-    "os.environ['BINANCE_API_SECRET'] = '{YOUR_API_KEY}'"
+    "# os.environ['BINANCE_API_KEY'] = '{YOUR_API_KEY}'\n",
+    "# os.environ['BINANCE_API_SECRET'] = '{YOUR_API_KEY}'"
    ]
   },
   {

diff --git a/advanced_tutorials/bitcoin/3_training_dataset_and_modeling.ipynb b/advanced_tutorials/bitcoin/3_training_dataset_and_modeling.ipynb
@@ -127,7 +127,6 @@
     "    name='bitcoin_price',\n",
     "    version=1\n",
     ")\n",
-    "\n",
     "# btc_price_fg.show(3)"
    ]
   },
@@ -155,7 +154,6 @@
     "    name='bitcoin_tweets_vader',\n",
     "    version=1\n",
     ")\n",
-    "\n",
     "# tweets_vader_fg.show(3)"
    ]
   },

diff --git a/advanced_tutorials/citibike/1_backfill_feature_groups.ipynb b/advanced_tutorials/citibike/1_backfill_feature_groups.ipynb
@@ -72,8 +72,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Fill in, if you are running on Colab, otherwise ignore this cell \n",
-    "os.environ['WEATHER_API_KEY'] = '{YOUR_API_KEY}'"
+    "# Uncomment and fill in if you are running on Colab\n",
+    "#os.environ['WEATHER_API_KEY'] = '{YOUR_API_KEY}'"
    ]
   },
   {

diff --git a/advanced_tutorials/citibike/2_feature_pipeline.ipynb b/advanced_tutorials/citibike/2_feature_pipeline.ipynb
@@ -58,8 +58,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Fill in, if you are running on Colab, otherwise ignore this cell \n",
-    "os.environ['WEATHER_API_KEY'] = '{YOUR_API_KEY}'"
+    "# Uncomment and fill in if you are running on Colab\n",
+    "#os.environ['WEATHER_API_KEY'] = '{YOUR_API_KEY}'"
    ]
   },
   {

diff --git a/advanced_tutorials/citibike/3_training_dataset_and_modeling.ipynb b/advanced_tutorials/citibike/3_training_dataset_and_modeling.ipynb
@@ -623,7 +623,6 @@
     "    start_time='2022-04-30',\n",
     "    end_time='2022-05-30'\n",
     ")\n",
-    "\n",
     "batch_data.head()"
    ]
   },

diff --git a/advanced_tutorials/electricity/3_training_dataset_and_modeling.ipynb b/advanced_tutorials/electricity/3_training_dataset_and_modeling.ipynb
@@ -135,15 +135,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Select features for training data.\n",
     "fg_query = electricity_prices_fg.select_all()\\\n",
     "    .join(\n",
     "    meteorological_measurements_fg\\\n",
-    "        .select_except([\"timestamp\",\"precipitaton_type_se1\",\"precipitaton_type_se2\",\"precipitaton_type_se3\",\"precipitaton_type_se4\"])\n",
+    "        .select_except([\"timestamp\"])\n",
     "    )\\\n",
     "    .join(\n",
     "        swedish_holidays_fg.select_all()\n",
-    "    )"
+    "    )\\\n",
+    ".filter(meteorological_measurements_fg.precipitaton_type_se1.isin(['missing','Regn']))\\\n",
+    ".filter(meteorological_measurements_fg.precipitaton_type_se2.isin(['missing','Regn']))\\\n",
+    ".filter(meteorological_measurements_fg.precipitaton_type_se3.isin(['missing','Regn']))\\\n",
+    ".filter(meteorological_measurements_fg.precipitaton_type_se4.isin(['missing','Regn']))"
    ]
   },
   {
@@ -187,7 +190,8 @@
     "    mapping_transformers[f\"precipitaton_amount_{area}\"] = fs.get_transformation_function(name=\"min_max_scaler\")\n",
     "    mapping_transformers[f\"total_sunshine_time_{area}\"] = fs.get_transformation_function(name=\"min_max_scaler\")\n",
     "    mapping_transformers[f\"mean_cloud_perc_{area}\"] = fs.get_transformation_function(name=\"min_max_scaler\")    \n",
-    "\n",
+    "    mapping_transformers[f\"precipitaton_type_{area}\"] = fs.get_transformation_function(name='label_encoder')\n",
+    "    \n",
     "mapping_transformers[\"type_of_day\"] = fs.get_transformation_function(name='label_encoder')"
    ]
   },
@@ -362,7 +366,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X_test"
+    "X_test.head(3)"
    ]
   },
   {

diff --git a/advanced_tutorials/nyc_taxi_fares/streamlit_app.py b/advanced_tutorials/nyc_taxi_fares/streamlit_app.py
@@ -64,7 +64,7 @@ def process_input_vector(pickup_latitude, pickup_longitude, dropoff_latitude, dr
 
 
 
-st.title('🚖NYC Taxi Fares Project🚖')
+st.title('🚖 NYC Taxi Fares Project 🚖')
 st.write(36 * "-")
 print_fancy_header('\n📡 Connecting to the Hopsworks Feature Store...')
 
@@ -177,7 +177,7 @@ def process_input_vector(pickup_latitude, pickup_longitude, dropoff_latitude, dr
 
     prediction = model.predict(X)[0]
 
-    st.subheader(f"Prediction: {str(prediction)} $")
+    st.subheader(f"Prediction: {str(round(prediction, 2))} $")
 
     st.write(36 * "-")
 

diff --git a/churn/streamlit_app.py b/churn/streamlit_app.py
@@ -20,26 +20,19 @@ def header(text):
 project = hopsworks.login()
 fs = project.get_feature_store()
 
-st.write(fs)
 header('🪄 Retrieving Feature View...')
 
+feature_view = fs.get_feature_view(
+    name="churn_feature_view",
+    version=1
+    )
 
-@st.cache(allow_output_mutation=True, suppress_st_warning=True)
-def retrive_feature_view(fs=fs):
-    feature_view = fs.get_feature_view(
-        name="churn_feature_view",
-        version=1
-        )
-    return feature_view
-
-
-feature_view = retrive_feature_view()
 st.text('Done ✅')
 header('⚙️ Reading DataFrames from Feature View...')
 
-
-@st.cache(allow_output_mutation=True, suppress_st_warning=True)
+@st.cache_data()
 def retrive_data(feature_view=feature_view):
+    feature_view.init_batch_scoring(1)
     batch_data = feature_view.get_batch_data()
     batch_data.drop('customerid', axis=1, inplace=True)
     df_all = feature_view.query.read()
@@ -54,7 +47,7 @@ def retrive_data(feature_view=feature_view):
 header('🔮 Model Retrieving...')
 
 
-@st.cache(allow_output_mutation=True, suppress_st_warning=True)
+@st.cache_data()
 def get_model(project=project):
     mr = project.get_model_registry()
     model = mr.get_model("churnmodel", version=1)
@@ -103,7 +96,7 @@ def transform_preds(predictions):
 feature_names = batch_data.columns
 
 feature_importance = pd.DataFrame(feature_names, columns=["feature"])
-feature_importance["importance"] = pow(math.e, model.coef_[0])
+feature_importance["importance"] = model.feature_importances_
 feature_importance = feature_importance.sort_values(by=["importance"], ascending=False)
 
 fig_importance = px.bar(

diff --git a/fraud_online/2_training_dataset_and_modeling.ipynb b/fraud_online/2_training_dataset_and_modeling.ipynb
@@ -480,7 +480,7 @@
     "    name=\"xgboost_fraud_online_model\", \n",
     "    metrics=metrics,\n",
     "    model_schema=model_schema,\n",
-    "    input_example=[4700702588013561], # for testing deployments\n",
+    "    input_example=[4467360740682089], # for testing deployments\n",
     "    description=\"Fraud Online Predictor\")\n",
     "\n",
     "fraud_model.save(model_dir)"
@@ -556,8 +556,8 @@
     "    def predict(self, inputs):\n",
     "        \"\"\" Serves a prediction request usign a trained model\"\"\"\n",
     "        feature_vector = self.fv.get_feature_vector({\"cc_num\": inputs[0][0]})\n",
-    "        inexes_to_remove = [0,1,2]\n",
-    "        feature_vector = [i for j, i in enumerate(feature_vector) if j not in inexes_to_remove]\n",
+    "        indexes_to_remove = [0,1,2]\n",
+    "        feature_vector = [i for j, i in enumerate(feature_vector) if j not in indexes_to_remove]\n",
     "        \n",
     "        return self.model.predict(np.asarray(feature_vector).reshape(1, -1)).tolist() # Numpy Arrays are not JSON serializable"
    ]
@@ -712,7 +712,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "deployment.stop()"
+    "deployment.stop(await_stopped=180)"
    ]
   },
   {

diff --git a/fraud_online/streamlit_app.py b/fraud_online/streamlit_app.py
@@ -29,27 +29,28 @@ def print_fancy_header(text, font_size=22, color="#ff5f27"):
 progress_bar.progress(35)
 
 
-@st.cache(allow_output_mutation=True, suppress_st_warning=True)
+@st.cache_resource()
 def retrive_dataset():
     st.write(36 * "-")
+    trans_fg = fs.get_feature_group('transactions_fraud_online_fg', version=1)
+    cc_nums = trans_fg.show(5).cc_num.tolist()
     print_fancy_header('\n💾 Dataset Retrieving...')
     feature_view = fs.get_feature_view("transactions_fraud_online_fv", 1)
     X_train, X_test, y_train, y_test = feature_view.get_train_test_split(1)
 
-    return feature_view, X_test, y_test
+    return feature_view, X_train, X_test, y_train, y_test, cc_nums
 
 
-feature_view, X_test, y_test = retrive_dataset()
+feature_view, X_train, X_test, y_train, y_test, cc_nums = retrive_dataset()
 # show concatenated training dataset (label is a 'fraud_label' feature)
 st.dataframe(pd.concat([X_test.head(),(y_test.head())], axis=1))
 progress_bar.progress(55)
 
-
 def explore_data():
     st.write(36 * "-")
     print_fancy_header('\n👁 Data Exploration...')
     labels = ["Normal", "Fraudulent"]
-    unique, counts = np.unique(y_test.fraud_label.values, return_counts=True)
+    unique, counts = np.unique(y_train.fraud_label.values, return_counts=True)
     values = counts.tolist()
 
     def plot_pie(values, labels):
@@ -66,35 +67,33 @@ def plot_pie(values, labels):
 
 st.write(36 * "-")
 print_fancy_header('\n🤖 Connecting to Model Registry on Hopsworks...')
-@st.cache(suppress_st_warning=True)
+@st.cache()
 def get_deployment(project):
     ms = project.get_model_serving()
     deployment = ms.get_deployment("fraudonlinemodeldeployment")
-    deployment.start()
+    deployment.start(await_running=180)
     return deployment
 
 deployment = get_deployment(project)
 
 progress_bar.progress(85)
 
-
 st.write(36 * "-")
 print_fancy_header('\n🧠 Interactive predictions...')
 with st.form(key="Selecting cc_num"):
     option = st.selectbox(
          'Select a credit card to get a fraud analysis.',
-         (X_test.cc_num.sample(5).values)
+         cc_nums,
          )
     submit_button = st.form_submit_button(label='Submit')
 if submit_button:
     st.write('You selected:', option)
-    data = {"inputs": [str(option)]}
-    res = deployment.predict(data)
+    res = deployment.predict(inputs=[int(option)])
     negative = "**👌 Not a suspicious**"
     positive = "**🆘 Fraudulent**"
     res = negative if res["predictions"][0] == -1 else positive
     st.write(res, "transaction.")
-    deployment.stop()
     progress_bar.progress(100)
     st.write(36 * "-")
-    print_fancy_header('\n🎉 📈 🤝 App Finished Successfully 🤝 📈 🎉')
+
+    print_fancy_header('\n🎉 📈 🤝 App Finished Successfully 🤝 📈 🎉')