From 14841f91fb2b411e23fe2a1a2a4767ca8acabac3 Mon Sep 17 00:00:00 2001 From: MrBLD Date: Sat, 23 Mar 2024 02:09:01 +0530 Subject: [PATCH] FIX: top k functioning and improved interpretability --- evaluator.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluator.ipynb b/evaluator.ipynb index 8390fd1..c8ca5d6 100644 --- a/evaluator.ipynb +++ b/evaluator.ipynb @@ -1 +1 @@ -{"cells":[{"source":"\"Kaggle\"","metadata":{},"cell_type":"markdown"},{"cell_type":"code","execution_count":1,"id":"5d6b05fa","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-05-08T13:30:44.255971Z","iopub.status.busy":"2023-05-08T13:30:44.255423Z","iopub.status.idle":"2023-05-08T13:30:44.435569Z","shell.execute_reply":"2023-05-08T13:30:44.43409Z"},"papermill":{"duration":0.188283,"end_time":"2023-05-08T13:30:44.438734","exception":false,"start_time":"2023-05-08T13:30:44.250451","status":"completed"},"tags":[]},"outputs":[],"source":["##-----------------------------------------------------------------------------------##\n","#\n","# Source: adapted for Kaggle from \n","# Building Recommender Systems with Machine Learning and AI, Sundog Education\n","#\n","##----------------------------------------------------------------------------------##\n","\n","from evaluation_data import EvaluationData\n","from evaluated_algorithm import EvaluatedAlgorithm\n","\n","class Evaluator:\n"," \n"," algorithms = []\n"," \n"," def __init__(self, dataset, rankings):\n"," ed = EvaluationData(dataset, rankings)\n"," self.dataset = ed\n"," \n"," def add_algorithm(self, algorithm, name):\n"," alg = EvaluatedAlgorithm(algorithm, name)\n"," self.algorithms.append(alg)\n"," \n"," def evaluate(self, do_top_n):\n"," results = {}\n"," for algorithm in self.algorithms:\n"," print(\"Evaluating \", algorithm.get_name(), \"...\")\n"," results[algorithm.get_name()] = algorithm.evaluate(self.dataset, do_top_n)\n","\n"," # Print results\n"," print(\"\\n\")\n"," \n"," if (do_top_n):\n"," print(\"{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}\".format(\n"," \"Algorithm\", \"RMSE\", \"MAE\", \"FCP\", \"HR\", \"cHR\", \"ARHR\", \"Coverage\", \"Diversity\", \"Novelty\"))\n"," for (name, metrics) in results.items():\n"," print(\"{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}\".format(\n"," name, metrics[\"RMSE\"], metrics[\"MAE\"], metrics[\"FCP\"], metrics[\"HR\"], metrics[\"cHR\"], metrics[\"ARHR\"],\n"," metrics[\"Coverage\"], metrics[\"Diversity\"], metrics[\"Novelty\"]))\n"," else:\n"," print(\"{:<10} {:<10} {:<10} {:<10}\".format(\"Algorithm\", \"RMSE\", \"MAE\", \"FCP\"))\n"," for (name, metrics) in results.items():\n"," print(\"{:<10} {:<10.4f} {:<10.4f} {:<10.4f}\".format(name, metrics[\"RMSE\"], metrics[\"MAE\"], metrics[\"FCP\"]))\n"," \n"," print(\"\\nLegend:\\n\")\n"," print(\"RMSE: Root Mean Squared Error. Lower values mean better accuracy.\")\n"," print(\"MAE: Mean Absolute Error. Lower values mean better accuracy.\")\n"," print(\"FCP: Fraction of Concordant Pairs. Higher values mean better accuracy.\")\n"," if (do_top_n):\n"," print(\"HR: Hit Rate; how often we are able to recommend a left-out rating. Higher is better.\")\n"," print(\"cHR: Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.\")\n"," print(\"ARHR: Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better.\" )\n"," print(\"Coverage: Ratio of users for whom recommendations above a certain threshold exist. Higher is better.\")\n"," print(\"Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations\")\n"," print(\" for a given user. Higher means more diverse.\")\n"," print(\"Novelty: Average popularity rank of recommended items. Higher means more novel.\")\n"," \n"," def sample_top_n_recs(self, ml, test_subject=85, k=10):\n"," \n"," for algo in self.algorithms:\n"," print(\"\\nUsing recommender \", algo.get_name())\n"," \n"," print(\"\\nBuilding recommendation model...\")\n"," train_set = self.dataset.get_full_train_set()\n"," algo.get_algorithm().fit(train_set)\n"," \n"," print(\"Computing recommendations...\")\n"," test_set = self.dataset.get_anti_test_set_for_user(test_subject)\n"," \n"," predictions = algo.get_algorithm().test(test_set)\n"," \n"," recommendations = []\n"," \n"," print (\"\\nWe recommend:\")\n"," for user_id, movie_id, actual_rating, estimated_rating, _ in predictions:\n"," int_movie_id = int(movie_id)\n"," recommendations.append((int_movie_id, estimated_rating))\n"," \n"," recommendations.sort(key=lambda x: x[1], reverse=True)\n"," \n"," for ratings in recommendations[:10]:\n"," print(ml.get_movie_name(ratings[0]), ratings[1])\n"," \n"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.12"},"papermill":{"default_parameters":{},"duration":13.345722,"end_time":"2023-05-08T13:30:45.165568","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-05-08T13:30:31.819846","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file +{"cells":[{"cell_type":"markdown","metadata":{},"source":["\"Kaggle\""]},{"cell_type":"code","execution_count":1,"id":"5d6b05fa","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-05-08T13:30:44.255971Z","iopub.status.busy":"2023-05-08T13:30:44.255423Z","iopub.status.idle":"2023-05-08T13:30:44.435569Z","shell.execute_reply":"2023-05-08T13:30:44.43409Z"},"papermill":{"duration":0.188283,"end_time":"2023-05-08T13:30:44.438734","exception":false,"start_time":"2023-05-08T13:30:44.250451","status":"completed"},"tags":[]},"outputs":[],"source":["##-----------------------------------------------------------------------------------##\n","#\n","# Source: adapted for Kaggle from \n","# Building Recommender Systems with Machine Learning and AI, Sundog Education\n","#\n","##----------------------------------------------------------------------------------##\n","\n","from evaluation_data import EvaluationData\n","from evaluated_algorithm import EvaluatedAlgorithm\n","\n","class Evaluator:\n"," \n"," algorithms = []\n"," \n"," def __init__(self, dataset, rankings):\n"," ed = EvaluationData(dataset, rankings)\n"," self.dataset = ed\n"," \n"," def add_algorithm(self, algorithm, name):\n"," alg = EvaluatedAlgorithm(algorithm, name)\n"," self.algorithms.append(alg)\n"," \n"," def evaluate(self, do_top_n):\n"," results = {}\n"," for algorithm in self.algorithms:\n"," print(\"Evaluating \", algorithm.get_name(), \"...\")\n"," results[algorithm.get_name()] = algorithm.evaluate(self.dataset, do_top_n)\n","\n"," # Print results\n"," print(\"\\n\")\n"," \n"," if (do_top_n):\n"," print(\"{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}\".format(\n"," \"Algorithm\", \"RMSE\", \"MAE\", \"FCP\", \"HR\", \"cHR\", \"ARHR\", \"Coverage\", \"Diversity\", \"Novelty\"))\n"," for (name, metrics) in results.items():\n"," print(\"{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}\".format(\n"," name, metrics[\"RMSE\"], metrics[\"MAE\"], metrics[\"FCP\"], metrics[\"HR\"], metrics[\"cHR\"], metrics[\"ARHR\"],\n"," metrics[\"Coverage\"], metrics[\"Diversity\"], metrics[\"Novelty\"]))\n"," else:\n"," print(\"{:<10} {:<10} {:<10} {:<10}\".format(\"Algorithm\", \"RMSE\", \"MAE\", \"FCP\"))\n"," for (name, metrics) in results.items():\n"," print(\"{:<10} {:<10.4f} {:<10.4f} {:<10.4f}\".format(name, metrics[\"RMSE\"], metrics[\"MAE\"], metrics[\"FCP\"]))\n"," \n"," print(\"\\nLegend:\\n\")\n"," print(\"RMSE: Root Mean Squared Error. Lower values mean better accuracy.\")\n"," print(\"MAE: Mean Absolute Error. Lower values mean better accuracy.\")\n"," print(\"FCP: Fraction of Concordant Pairs. Higher values mean better accuracy.\")\n"," if (do_top_n):\n"," print(\"HR: Hit Rate; how often we are able to recommend a left-out rating. Higher is better.\")\n"," print(\"cHR: Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.\")\n"," print(\"ARHR: Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better.\" )\n"," print(\"Coverage: Ratio of users for whom recommendations above a certain threshold exist. Higher is better.\")\n"," print(\"Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations\")\n"," print(\" for a given user. Higher means more diverse.\")\n"," print(\"Novelty: Average popularity rank of recommended items. Higher means more novel.\")\n"," \n"," def sample_top_n_recs(self, ml, test_subject=85, k=10):\n"," \n"," for algo in self.algorithms:\n"," print(\"\\nRecommending for: \",ml.get_movie_name(test_subject))\n"," print(\"\\nUsing recommender \", algo.get_name())\n"," \n"," print(\"\\nBuilding recommendation model...\")\n"," train_set = self.dataset.get_full_train_set()\n"," algo.get_algorithm().fit(train_set)\n"," \n"," print(\"Computing recommendations...\")\n"," test_set = self.dataset.get_anti_test_set_for_user(test_subject)\n"," \n"," predictions = algo.get_algorithm().test(test_set)\n"," \n"," recommendations = []\n"," \n"," print (\"\\nWe recommend:\")\n"," for user_id, movie_id, actual_rating, estimated_rating, _ in predictions:\n"," int_movie_id = int(movie_id)\n"," recommendations.append((int_movie_id, estimated_rating))\n"," \n"," recommendations.sort(key=lambda x: x[1], reverse=True)\n"," \n"," for ratings in recommendations[:k]:\n"," print(ml.get_movie_name(ratings[0]), ratings[1])\n"," print(\"-----------------------------------------\")\n"," \n"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.12"},"papermill":{"default_parameters":{},"duration":13.345722,"end_time":"2023-05-08T13:30:45.165568","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-05-08T13:30:31.819846","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5}