diff --git a/advanced_tutorials/air_quality/streamlit_app.py b/advanced_tutorials/air_quality/streamlit_app.py index e7c640f2..d4c9b774 100644 --- a/advanced_tutorials/air_quality/streamlit_app.py +++ b/advanced_tutorials/air_quality/streamlit_app.py @@ -12,8 +12,7 @@ from streamlit_folium import st_folium from functions import * -import features.air_quality - +from features import air_quality def print_fancy_header(text, font_size=22, color="#ff5f27"): @@ -232,7 +231,7 @@ def plot_pm2_5(df): df_aq_temp['date'] = pd.to_datetime(df_aq_temp['date']) - df_aq_temp = feature_engineer_aq(df_aq_temp) + df_aq_temp = air_quality.feature_engineer_aq(df_aq_temp) # we need only the last row (one city, one day) df_aq_temp = df_aq_temp[df_aq_temp['city_name'] == city_name].tail(1) diff --git a/advanced_tutorials/bitcoin/1_backfill_feature_groups.ipynb b/advanced_tutorials/bitcoin/1_backfill_feature_groups.ipynb index 5fbd911d..1052abcf 100644 --- a/advanced_tutorials/bitcoin/1_backfill_feature_groups.ipynb +++ b/advanced_tutorials/bitcoin/1_backfill_feature_groups.ipynb @@ -74,14 +74,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Fill in, if you are running on Colab, otherwise ignore this cell \n", - "os.environ['TWITTER_API_KEY'] = '{YOUR_API_KEY}'\n", - "os.environ['TWITTER_API_SECRET'] = '{YOUR_API_KEY}'\n", - "os.environ['TWITTER_ACCESS_TOKEN'] = '{YOUR_API_KEY}'\n", - "os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = '{YOUR_API_KEY}'\n", + "# Uncomment and fill in if you are running on Colab\n", + "# os.environ['TWITTER_API_KEY'] = '{YOUR_API_KEY}'\n", + "# os.environ['TWITTER_API_SECRET'] = '{YOUR_API_KEY}'\n", + "# os.environ['TWITTER_ACCESS_TOKEN'] = '{YOUR_API_KEY}'\n", + "# os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = '{YOUR_API_KEY}'\n", "\n", - "os.environ['BINANCE_API_KEY'] = '{YOUR_API_KEY}'\n", - "os.environ['BINANCE_API_SECRET'] = '{YOUR_API_KEY}'" + "# os.environ['BINANCE_API_KEY'] = '{YOUR_API_KEY}'\n", + "# os.environ['BINANCE_API_SECRET'] = '{YOUR_API_KEY}'" ] }, { diff --git a/advanced_tutorials/bitcoin/2_feature_pipeline.ipynb b/advanced_tutorials/bitcoin/2_feature_pipeline.ipynb index 61ea232c..b6bc5c59 100644 --- a/advanced_tutorials/bitcoin/2_feature_pipeline.ipynb +++ b/advanced_tutorials/bitcoin/2_feature_pipeline.ipynb @@ -64,14 +64,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Fill in, if you are running on Colab, otherwise ignore this cell \n", - "os.environ['TWITTER_API_KEY'] = '{YOUR_API_KEY}'\n", - "os.environ['TWITTER_API_SECRET'] = '{YOUR_API_KEY}'\n", - "os.environ['TWITTER_ACCESS_TOKEN'] = '{YOUR_API_KEY}'\n", - "os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = '{YOUR_API_KEY}'\n", + "# Uncomment and fill in if you are running on Colab\n", + "# os.environ['TWITTER_API_KEY'] = '{YOUR_API_KEY}'\n", + "# os.environ['TWITTER_API_SECRET'] = '{YOUR_API_KEY}'\n", + "# os.environ['TWITTER_ACCESS_TOKEN'] = '{YOUR_API_KEY}'\n", + "# os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = '{YOUR_API_KEY}'\n", "\n", - "os.environ['BINANCE_API_KEY'] = '{YOUR_API_KEY}'\n", - "os.environ['BINANCE_API_SECRET'] = '{YOUR_API_KEY}'" + "# os.environ['BINANCE_API_KEY'] = '{YOUR_API_KEY}'\n", + "# os.environ['BINANCE_API_SECRET'] = '{YOUR_API_KEY}'" ] }, { diff --git a/advanced_tutorials/bitcoin/3_training_dataset_and_modeling.ipynb b/advanced_tutorials/bitcoin/3_training_dataset_and_modeling.ipynb index 65e0d7d6..e6435cdc 100644 --- a/advanced_tutorials/bitcoin/3_training_dataset_and_modeling.ipynb +++ b/advanced_tutorials/bitcoin/3_training_dataset_and_modeling.ipynb @@ -127,7 +127,6 @@ " name='bitcoin_price',\n", " version=1\n", ")\n", - "\n", "# btc_price_fg.show(3)" ] }, @@ -155,7 +154,6 @@ " name='bitcoin_tweets_vader',\n", " version=1\n", ")\n", - "\n", "# tweets_vader_fg.show(3)" ] }, diff --git a/advanced_tutorials/citibike/1_backfill_feature_groups.ipynb b/advanced_tutorials/citibike/1_backfill_feature_groups.ipynb index f4c33bc5..8dc59321 100644 --- a/advanced_tutorials/citibike/1_backfill_feature_groups.ipynb +++ b/advanced_tutorials/citibike/1_backfill_feature_groups.ipynb @@ -72,8 +72,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Fill in, if you are running on Colab, otherwise ignore this cell \n", - "os.environ['WEATHER_API_KEY'] = '{YOUR_API_KEY}'" + "# Uncomment and fill in if you are running on Colab\n", + "#os.environ['WEATHER_API_KEY'] = '{YOUR_API_KEY}'" ] }, { diff --git a/advanced_tutorials/citibike/2_feature_pipeline.ipynb b/advanced_tutorials/citibike/2_feature_pipeline.ipynb index 4172b83f..5f9f4bc7 100644 --- a/advanced_tutorials/citibike/2_feature_pipeline.ipynb +++ b/advanced_tutorials/citibike/2_feature_pipeline.ipynb @@ -58,8 +58,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Fill in, if you are running on Colab, otherwise ignore this cell \n", - "os.environ['WEATHER_API_KEY'] = '{YOUR_API_KEY}'" + "# Uncomment and fill in if you are running on Colab\n", + "#os.environ['WEATHER_API_KEY'] = '{YOUR_API_KEY}'" ] }, { diff --git a/advanced_tutorials/citibike/3_training_dataset_and_modeling.ipynb b/advanced_tutorials/citibike/3_training_dataset_and_modeling.ipynb index 732c3bc0..e269dd91 100644 --- a/advanced_tutorials/citibike/3_training_dataset_and_modeling.ipynb +++ b/advanced_tutorials/citibike/3_training_dataset_and_modeling.ipynb @@ -623,7 +623,6 @@ " start_time='2022-04-30',\n", " end_time='2022-05-30'\n", ")\n", - "\n", "batch_data.head()" ] }, diff --git a/advanced_tutorials/electricity/3_training_dataset_and_modeling.ipynb b/advanced_tutorials/electricity/3_training_dataset_and_modeling.ipynb index c404aba8..b70ee6a1 100644 --- a/advanced_tutorials/electricity/3_training_dataset_and_modeling.ipynb +++ b/advanced_tutorials/electricity/3_training_dataset_and_modeling.ipynb @@ -135,15 +135,18 @@ "metadata": {}, "outputs": [], "source": [ - "# Select features for training data.\n", "fg_query = electricity_prices_fg.select_all()\\\n", " .join(\n", " meteorological_measurements_fg\\\n", - " .select_except([\"timestamp\",\"precipitaton_type_se1\",\"precipitaton_type_se2\",\"precipitaton_type_se3\",\"precipitaton_type_se4\"])\n", + " .select_except([\"timestamp\"])\n", " )\\\n", " .join(\n", " swedish_holidays_fg.select_all()\n", - " )" + " )\\\n", + ".filter(meteorological_measurements_fg.precipitaton_type_se1.isin(['missing','Regn']))\\\n", + ".filter(meteorological_measurements_fg.precipitaton_type_se2.isin(['missing','Regn']))\\\n", + ".filter(meteorological_measurements_fg.precipitaton_type_se3.isin(['missing','Regn']))\\\n", + ".filter(meteorological_measurements_fg.precipitaton_type_se4.isin(['missing','Regn']))" ] }, { @@ -187,7 +190,8 @@ " mapping_transformers[f\"precipitaton_amount_{area}\"] = fs.get_transformation_function(name=\"min_max_scaler\")\n", " mapping_transformers[f\"total_sunshine_time_{area}\"] = fs.get_transformation_function(name=\"min_max_scaler\")\n", " mapping_transformers[f\"mean_cloud_perc_{area}\"] = fs.get_transformation_function(name=\"min_max_scaler\") \n", - "\n", + " mapping_transformers[f\"precipitaton_type_{area}\"] = fs.get_transformation_function(name='label_encoder')\n", + " \n", "mapping_transformers[\"type_of_day\"] = fs.get_transformation_function(name='label_encoder')" ] }, @@ -362,7 +366,7 @@ "metadata": {}, "outputs": [], "source": [ - "X_test" + "X_test.head(3)" ] }, { diff --git a/advanced_tutorials/nyc_taxi_fares/streamlit_app.py b/advanced_tutorials/nyc_taxi_fares/streamlit_app.py index 41cce73a..c9b6226a 100644 --- a/advanced_tutorials/nyc_taxi_fares/streamlit_app.py +++ b/advanced_tutorials/nyc_taxi_fares/streamlit_app.py @@ -64,7 +64,7 @@ def process_input_vector(pickup_latitude, pickup_longitude, dropoff_latitude, dr -st.title('🚖NYC Taxi Fares Project🚖') +st.title('🚖 NYC Taxi Fares Project 🚖') st.write(36 * "-") print_fancy_header('\n📡 Connecting to the Hopsworks Feature Store...') @@ -177,7 +177,7 @@ def process_input_vector(pickup_latitude, pickup_longitude, dropoff_latitude, dr prediction = model.predict(X)[0] - st.subheader(f"Prediction: {str(prediction)} $") + st.subheader(f"Prediction: {str(round(prediction, 2))} $") st.write(36 * "-") diff --git a/churn/streamlit_app.py b/churn/streamlit_app.py index 22506a1a..97bd3e8b 100644 --- a/churn/streamlit_app.py +++ b/churn/streamlit_app.py @@ -20,26 +20,19 @@ def header(text): project = hopsworks.login() fs = project.get_feature_store() -st.write(fs) header('🪄 Retrieving Feature View...') +feature_view = fs.get_feature_view( + name="churn_feature_view", + version=1 + ) -@st.cache(allow_output_mutation=True, suppress_st_warning=True) -def retrive_feature_view(fs=fs): - feature_view = fs.get_feature_view( - name="churn_feature_view", - version=1 - ) - return feature_view - - -feature_view = retrive_feature_view() st.text('Done ✅') header('⚙️ Reading DataFrames from Feature View...') - -@st.cache(allow_output_mutation=True, suppress_st_warning=True) +@st.cache_data() def retrive_data(feature_view=feature_view): + feature_view.init_batch_scoring(1) batch_data = feature_view.get_batch_data() batch_data.drop('customerid', axis=1, inplace=True) df_all = feature_view.query.read() @@ -54,7 +47,7 @@ def retrive_data(feature_view=feature_view): header('🔮 Model Retrieving...') -@st.cache(allow_output_mutation=True, suppress_st_warning=True) +@st.cache_data() def get_model(project=project): mr = project.get_model_registry() model = mr.get_model("churnmodel", version=1) @@ -103,7 +96,7 @@ def transform_preds(predictions): feature_names = batch_data.columns feature_importance = pd.DataFrame(feature_names, columns=["feature"]) -feature_importance["importance"] = pow(math.e, model.coef_[0]) +feature_importance["importance"] = model.feature_importances_ feature_importance = feature_importance.sort_values(by=["importance"], ascending=False) fig_importance = px.bar( diff --git a/fraud_online/2_training_dataset_and_modeling.ipynb b/fraud_online/2_training_dataset_and_modeling.ipynb index 310836c3..6482a89c 100644 --- a/fraud_online/2_training_dataset_and_modeling.ipynb +++ b/fraud_online/2_training_dataset_and_modeling.ipynb @@ -480,7 +480,7 @@ " name=\"xgboost_fraud_online_model\", \n", " metrics=metrics,\n", " model_schema=model_schema,\n", - " input_example=[4700702588013561], # for testing deployments\n", + " input_example=[4467360740682089], # for testing deployments\n", " description=\"Fraud Online Predictor\")\n", "\n", "fraud_model.save(model_dir)" @@ -556,8 +556,8 @@ " def predict(self, inputs):\n", " \"\"\" Serves a prediction request usign a trained model\"\"\"\n", " feature_vector = self.fv.get_feature_vector({\"cc_num\": inputs[0][0]})\n", - " inexes_to_remove = [0,1,2]\n", - " feature_vector = [i for j, i in enumerate(feature_vector) if j not in inexes_to_remove]\n", + " indexes_to_remove = [0,1,2]\n", + " feature_vector = [i for j, i in enumerate(feature_vector) if j not in indexes_to_remove]\n", " \n", " return self.model.predict(np.asarray(feature_vector).reshape(1, -1)).tolist() # Numpy Arrays are not JSON serializable" ] @@ -712,7 +712,7 @@ "metadata": {}, "outputs": [], "source": [ - "deployment.stop()" + "deployment.stop(await_stopped=180)" ] }, { diff --git a/fraud_online/streamlit_app.py b/fraud_online/streamlit_app.py index 44002cb8..3f61854f 100644 --- a/fraud_online/streamlit_app.py +++ b/fraud_online/streamlit_app.py @@ -29,27 +29,28 @@ def print_fancy_header(text, font_size=22, color="#ff5f27"): progress_bar.progress(35) -@st.cache(allow_output_mutation=True, suppress_st_warning=True) +@st.cache_resource() def retrive_dataset(): st.write(36 * "-") + trans_fg = fs.get_feature_group('transactions_fraud_online_fg', version=1) + cc_nums = trans_fg.show(5).cc_num.tolist() print_fancy_header('\n💾 Dataset Retrieving...') feature_view = fs.get_feature_view("transactions_fraud_online_fv", 1) X_train, X_test, y_train, y_test = feature_view.get_train_test_split(1) - return feature_view, X_test, y_test + return feature_view, X_train, X_test, y_train, y_test, cc_nums -feature_view, X_test, y_test = retrive_dataset() +feature_view, X_train, X_test, y_train, y_test, cc_nums = retrive_dataset() # show concatenated training dataset (label is a 'fraud_label' feature) st.dataframe(pd.concat([X_test.head(),(y_test.head())], axis=1)) progress_bar.progress(55) - def explore_data(): st.write(36 * "-") print_fancy_header('\n👁 Data Exploration...') labels = ["Normal", "Fraudulent"] - unique, counts = np.unique(y_test.fraud_label.values, return_counts=True) + unique, counts = np.unique(y_train.fraud_label.values, return_counts=True) values = counts.tolist() def plot_pie(values, labels): @@ -66,35 +67,33 @@ def plot_pie(values, labels): st.write(36 * "-") print_fancy_header('\n🤖 Connecting to Model Registry on Hopsworks...') -@st.cache(suppress_st_warning=True) +@st.cache() def get_deployment(project): ms = project.get_model_serving() deployment = ms.get_deployment("fraudonlinemodeldeployment") - deployment.start() + deployment.start(await_running=180) return deployment deployment = get_deployment(project) progress_bar.progress(85) - st.write(36 * "-") print_fancy_header('\n🧠 Interactive predictions...') with st.form(key="Selecting cc_num"): option = st.selectbox( 'Select a credit card to get a fraud analysis.', - (X_test.cc_num.sample(5).values) + cc_nums, ) submit_button = st.form_submit_button(label='Submit') if submit_button: st.write('You selected:', option) - data = {"inputs": [str(option)]} - res = deployment.predict(data) + res = deployment.predict(inputs=[int(option)]) negative = "**👌 Not a suspicious**" positive = "**🆘 Fraudulent**" res = negative if res["predictions"][0] == -1 else positive st.write(res, "transaction.") - deployment.stop() progress_bar.progress(100) st.write(36 * "-") - print_fancy_header('\n🎉 📈 🤝 App Finished Successfully 🤝 📈 🎉') + + print_fancy_header('\n🎉 📈 🤝 App Finished Successfully 🤝 📈 🎉') \ No newline at end of file