From 1ac324bb472abd3143dfebc212a2fb119d3fc88a Mon Sep 17 00:00:00 2001 From: Max Halford Date: Sat, 4 Jan 2025 01:11:51 +0100 Subject: [PATCH] partial_row_coordinates --- docs/content/famd.ipynb | 2 +- docs/content/mfa.ipynb | 477 ++++++++++++++++++++++++---------------- docs/content/pca.ipynb | 202 ++++++++--------- prince/mfa.py | 116 ++++++---- 4 files changed, 463 insertions(+), 334 deletions(-) diff --git a/docs/content/famd.ipynb b/docs/content/famd.ipynb index b6c0205..dadfe37 100644 --- a/docs/content/famd.ipynb +++ b/docs/content/famd.ipynb @@ -21,7 +21,7 @@ "source": [ "## Resources\n", "\n", - "🤷‍♂️" + "- [Wikipedia article](https://en.wikipedia.org/wiki/Factor_analysis_of_mixed_data)" ] }, { diff --git a/docs/content/mfa.ipynb b/docs/content/mfa.ipynb index c04161c..30fab5a 100644 --- a/docs/content/mfa.ipynb +++ b/docs/content/mfa.ipynb @@ -28,7 +28,8 @@ "metadata": {}, "source": [ "- [*Multiple Factor Analysis* by Hervé Abdi](https://www.utdallas.edu/~herve/Abdi-MFA2007-pretty.pdf)\n", - "- [*Multiple Factor Analysis: main features and application to sensory data* by Jérôme Pagès](http://factominer.free.fr/more/PagesAFM.pdf)" + "- [*Multiple Factor Analysis: main features and application to sensory data* by Jérôme Pagès](http://factominer.free.fr/more/PagesAFM.pdf)\n", + "- [Wikipedia article](https://en.wikipedia.org/wiki/Multiple_factor_analysis)" ] }, { @@ -478,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -487,12 +488,14 @@ "True" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "import pandas as pd\n", + "\n", "isinstance(dataset.columns, pd.MultiIndex)" ] }, @@ -514,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:07.158550Z", @@ -530,7 +533,7 @@ "['2021-22', '2022-23', '2023-24']" ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -542,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:07.170307Z", @@ -592,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:07.205973Z", @@ -665,7 +668,7 @@ "2 0.412 10.32% 85.36%" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -692,7 +695,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:07.266647Z", @@ -842,7 +845,7 @@ "Wolverhampton Wanderers -1.539976 -0.294226 0.882576" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -852,16 +855,15 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "However, all the other methods are not implemented yet. They will raise a `NotImplemented` exception if you call them." + "There is also a `partial_row_coordinates` method that returns the coordinates projected onto each group." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:07.290866Z", @@ -895,13 +897,13 @@ "\n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -928,221 +930,221 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
group2021-222022-232023-24
component012
Arsenal2.582726-0.2226945.3022439.3751868.365184-3.08847413.1520213.4706835.6173420.690262-0.0595171.4170842.5056242.235689-0.8254303.5150250.9275791.501298
Aston Villa-4.5082856.7627903.3604870.425462-0.1312481.3883932.062462-0.1177260.459079-1.2048901.8074320.8981280.113710-0.0350780.3710640.551216-0.0314640.122694
Brentford-4.8246996.8314462.321044-0.9137981.656430-5.108336-8.488562-0.666794-2.719762-1.2894551.8257810.620325-0.2442230.442700-1.365260-2.268664-0.178208-0.726887
Brighton & Hove Albion-3.8364270.863534-6.7556431.2329510.1087931.357369-6.377201-3.766425-4.755869-1.0253280.230789-1.8055210.3295200.0290760.362772-1.704379-1.006619-1.271060
Chelsea5.327119-8.454770-3.978688-5.636604-4.6045900.8805350.310700-1.015164-1.0039181.423732-2.259632-1.063349-1.506446-1.2306280.2353330.083038-0.271314-0.268308
Crystal Palace-4.1392021.363018-6.617784-5.658228-4.2955320.216513-5.085854-1.679302-2.682664-1.1062480.364282-1.768677-1.512225-1.1480290.057866-1.359252-0.448812-0.716972
Everton-7.57857211.2767453.996240-9.227286-8.5946153.749277-6.645565-0.613473-1.545628-2.0254593.0138371.068040-2.466096-2.2970071.002036-1.776102-0.163958-0.413086
Liverpool11.734074-14.796924-1.8514942.9784623.350864-2.8582288.447234-1.6896510.0003183.136063-3.954644-0.4948320.7960270.895556-0.7638942.257618-0.4515780.000085
Manchester City12.520592-14.7302610.21811712.36563011.578338-5.56144313.2087911.3495103.6465643.346269-3.9368280.0582943.3048543.094441-1.4863583.5301980.3606720.974585
Manchester United-1.7300522.061911-1.4524604.9467064.456987-0.769662-1.0901091.9403992.133472-0.4623760.551069-0.3881861.3220631.191180-0.205701-0.2913440.5185930.570194
Newcastle United-5.2014896.386372-0.8449264.2512227.896945-10.4556300.8980032.1323211.507860-1.3901561.706830-0.2258161.1361872.110547-2.7943850.2400010.5698870.402993
Tottenham Hotspur4.108539-3.6081862.811348-0.139551-2.9462466.0670431.7620601.8741262.2582201.098053-0.9643280.751364-0.037297-0.7874171.6214850.4709300.5008810.603535
West Ham United-1.2860501.9613800.603414-6.459029-8.2001807.096018-5.577193-2.251314-2.847181-0.3437110.5242010.161269-1.726248-2.1915901.896491-1.490567-0.601689-0.760941
Wolverhampton Wanderers-3.1682754.3056402.888101-7.541121-8.6411317.086626-6.5767881.032811-0.067834-0.8467571.1507310.771878-2.015449-2.3094391.893981-1.7577210.276030-0.018129
\n", "" ], "text/plain": [ - "group 2021-22 2022-23 \\\n", - "component 0 1 2 0 1 \n", - "Team \n", - "Arsenal 2.582726 -0.222694 5.302243 9.375186 8.365184 \n", - "Aston Villa -4.508285 6.762790 3.360487 0.425462 -0.131248 \n", - "Brentford -4.824699 6.831446 2.321044 -0.913798 1.656430 \n", - "Brighton & Hove Albion -3.836427 0.863534 -6.755643 1.232951 0.108793 \n", - "Chelsea 5.327119 -8.454770 -3.978688 -5.636604 -4.604590 \n", - "Crystal Palace -4.139202 1.363018 -6.617784 -5.658228 -4.295532 \n", - "Everton -7.578572 11.276745 3.996240 -9.227286 -8.594615 \n", - "Liverpool 11.734074 -14.796924 -1.851494 2.978462 3.350864 \n", - "Manchester City 12.520592 -14.730261 0.218117 12.365630 11.578338 \n", - "Manchester United -1.730052 2.061911 -1.452460 4.946706 4.456987 \n", - "Newcastle United -5.201489 6.386372 -0.844926 4.251222 7.896945 \n", - "Tottenham Hotspur 4.108539 -3.608186 2.811348 -0.139551 -2.946246 \n", - "West Ham United -1.286050 1.961380 0.603414 -6.459029 -8.200180 \n", - "Wolverhampton Wanderers -3.168275 4.305640 2.888101 -7.541121 -8.641131 \n", + " 2021-22 2022-23 \\\n", + " 0 1 2 0 1 \n", + "Team \n", + "Arsenal 0.690262 -0.059517 1.417084 2.505624 2.235689 \n", + "Aston Villa -1.204890 1.807432 0.898128 0.113710 -0.035078 \n", + "Brentford -1.289455 1.825781 0.620325 -0.244223 0.442700 \n", + "Brighton & Hove Albion -1.025328 0.230789 -1.805521 0.329520 0.029076 \n", + "Chelsea 1.423732 -2.259632 -1.063349 -1.506446 -1.230628 \n", + "Crystal Palace -1.106248 0.364282 -1.768677 -1.512225 -1.148029 \n", + "Everton -2.025459 3.013837 1.068040 -2.466096 -2.297007 \n", + "Liverpool 3.136063 -3.954644 -0.494832 0.796027 0.895556 \n", + "Manchester City 3.346269 -3.936828 0.058294 3.304854 3.094441 \n", + "Manchester United -0.462376 0.551069 -0.388186 1.322063 1.191180 \n", + "Newcastle United -1.390156 1.706830 -0.225816 1.136187 2.110547 \n", + "Tottenham Hotspur 1.098053 -0.964328 0.751364 -0.037297 -0.787417 \n", + "West Ham United -0.343711 0.524201 0.161269 -1.726248 -2.191590 \n", + "Wolverhampton Wanderers -0.846757 1.150731 0.771878 -2.015449 -2.309439 \n", "\n", - "group 2023-24 \n", - "component 2 0 1 2 \n", - "Team \n", - "Arsenal -3.088474 13.152021 3.470683 5.617342 \n", - "Aston Villa 1.388393 2.062462 -0.117726 0.459079 \n", - "Brentford -5.108336 -8.488562 -0.666794 -2.719762 \n", - "Brighton & Hove Albion 1.357369 -6.377201 -3.766425 -4.755869 \n", - "Chelsea 0.880535 0.310700 -1.015164 -1.003918 \n", - "Crystal Palace 0.216513 -5.085854 -1.679302 -2.682664 \n", - "Everton 3.749277 -6.645565 -0.613473 -1.545628 \n", - "Liverpool -2.858228 8.447234 -1.689651 0.000318 \n", - "Manchester City -5.561443 13.208791 1.349510 3.646564 \n", - "Manchester United -0.769662 -1.090109 1.940399 2.133472 \n", - "Newcastle United -10.455630 0.898003 2.132321 1.507860 \n", - "Tottenham Hotspur 6.067043 1.762060 1.874126 2.258220 \n", - "West Ham United 7.096018 -5.577193 -2.251314 -2.847181 \n", - "Wolverhampton Wanderers 7.086626 -6.576788 1.032811 -0.067834 " + " 2023-24 \n", + " 2 0 1 2 \n", + "Team \n", + "Arsenal -0.825430 3.515025 0.927579 1.501298 \n", + "Aston Villa 0.371064 0.551216 -0.031464 0.122694 \n", + "Brentford -1.365260 -2.268664 -0.178208 -0.726887 \n", + "Brighton & Hove Albion 0.362772 -1.704379 -1.006619 -1.271060 \n", + "Chelsea 0.235333 0.083038 -0.271314 -0.268308 \n", + "Crystal Palace 0.057866 -1.359252 -0.448812 -0.716972 \n", + "Everton 1.002036 -1.776102 -0.163958 -0.413086 \n", + "Liverpool -0.763894 2.257618 -0.451578 0.000085 \n", + "Manchester City -1.486358 3.530198 0.360672 0.974585 \n", + "Manchester United -0.205701 -0.291344 0.518593 0.570194 \n", + "Newcastle United -2.794385 0.240001 0.569887 0.402993 \n", + "Tottenham Hotspur 1.621485 0.470930 0.500881 0.603535 \n", + "West Ham United 1.896491 -1.490567 -0.601689 -0.760941 \n", + "Wolverhampton Wanderers 1.893981 -1.757721 0.276030 -0.018129 " ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mfa.group_row_coordinates(dataset)" + "mfa.partial_row_coordinates(dataset)" ] }, { @@ -1155,7 +1157,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:07.324369Z", @@ -1170,23 +1172,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "" ], "text/plain": [ - "alt.Chart(...)" + "alt.LayerChart(...)" ] }, - "execution_count": 9, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1259,7 +1261,102 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The first axis explains most of the difference between the wine ratings. This difference is actually due to the oak type of the barrels they were fermented in." + "The first axis explains most of the difference between the wine ratings. This difference is actually due to the oak type of the barrels they were fermented in.\n", + "\n", + "The `show_partial_rows` argument allows showing the global row coordinates together with the partial row coordinates. All the coordinates of each sample are connected with edges." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mfa.plot(\n", + " dataset,\n", + " show_partial_rows=True\n", + ")" ] }, { @@ -1280,7 +1377,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:07.395738Z", @@ -1478,7 +1575,7 @@ "Wolverhampton Wanderers 11 8 19 31 58 41" ] }, - "execution_count": 20, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } diff --git a/docs/content/pca.ipynb b/docs/content/pca.ipynb index 3469b96..71df4ef 100644 --- a/docs/content/pca.ipynb +++ b/docs/content/pca.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:08.831204Z", @@ -55,19 +55,19 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -84,73 +84,73 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
  coaloilgasnuclearhydrowindsolarother renewablescoaloilgasnuclearhydrowindsolarother renewables
continent
AfricaAlgeria1%35%64%0%0%0%0%0%
South AmericaArgentina1%35%50%2%10%1%0%1%
OceaniaAustralia28%34%30%0%2%3%3%1%
EuropeAustria9%37%22%0%25%4%1%3%
AsiaAzerbaijan0%33%65%0%2%0%0%0%AfricaAlgeria1%35%64%0%0%0%0%0%
South AmericaArgentina1%35%50%2%10%1%0%1%
OceaniaAustralia28%34%30%0%2%3%3%1%
EuropeAustria9%37%22%0%25%4%1%3%
AsiaAzerbaijan0%33%65%0%2%0%0%0%
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 9, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -180,7 +180,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:09.271364Z", @@ -241,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:09.300402Z", @@ -314,7 +314,7 @@ "2 1.403 17.54% 61.59%" ] }, - "execution_count": 11, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -335,7 +335,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:09.337429Z", @@ -351,7 +351,7 @@ "array([1.96301214, 1.56069986, 1.40327823])" ] }, - "execution_count": 12, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -362,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:09.380475Z", @@ -378,7 +378,7 @@ "array([24.5376518 , 19.50874822, 17.54097785])" ] }, - "execution_count": 13, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -389,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:09.406971Z", @@ -405,7 +405,7 @@ "array([24.5376518 , 44.04640003, 61.58737788])" ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -424,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-09-07T18:18:09.427611Z", @@ -439,23 +439,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "