Skip to content

Commit

Permalink
refactor mfa
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxHalford committed Jan 3, 2025
1 parent 3aedb36 commit 96d8c5e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 60 deletions.
80 changes: 22 additions & 58 deletions prince/mfa.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""Multiple Factor Analysis (MFA)"""

from __future__ import annotations

import collections

import altair as alt
import numpy as np
import pandas as pd
import sklearn.utils

from prince import pca, utils

Expand All @@ -22,8 +22,8 @@ def __init__(
engine="sklearn",
):
super().__init__(
rescale_with_mean=False,
rescale_with_std=False,
rescale_with_mean=True,
rescale_with_std=True,
n_components=n_components,
n_iter=n_iter,
copy=copy,
Expand All @@ -33,18 +33,11 @@ def __init__(
)
collections.UserDict.__init__(self)

def _check_input(self, X):
if self.check_input:
sklearn.utils.check_array(X, dtype=[str, np.number])

@utils.check_is_dataframe_input
def fit(self, X, y=None, groups=None):
# Checks groups are provided
self.groups_ = self._determine_groups(X, groups)

# Check input
self._check_input(X)

# Check group types are consistent
self.all_nums_ = {}
for name, cols in sorted(self.groups_.items()):
Expand All @@ -71,16 +64,11 @@ def fit(self, X, y=None, groups=None):
self[name] = fa.fit(X.loc[:, cols])

# Fit the global PCA
X = (X - X.mean()) / (X.std())
Z = pd.concat(
(X[cols] / self[name].eigenvalues_[0] for name, cols in self.groups_.items()),
axis="columns",
Z = self._build_Z(X)
column_weights = np.array(
[1 / self[name].eigenvalues_[0] for name, cols in self.groups_.items() for _ in cols]
)
super().fit(Z)
self.total_inertia_ = sum(self.eigenvalues_)

# TODO: column_coordinates_ is not implemented yet
delattr(self, "column_coordinates_")
super().fit(Z, column_weight=column_weights)

return self

Expand All @@ -101,41 +89,17 @@ def _determine_groups(self, X, provided_groups):
groups = provided_groups
return groups

@property
@utils.check_is_fitted
def eigenvalues_(self):
"""Returns the eigenvalues associated with each principal component."""
return np.square(self.svd_.s)

@utils.check_is_dataframe_input
@utils.check_is_fitted
def row_coordinates(self, X):
"""Returns the row principal coordinates."""

if (X.index != self.row_contributions_.index).any():
raise NotImplementedError("Supplementary rows are not supported yet")

X = (X - X.mean()) / ((X - X.mean()) ** 2).sum() ** 0.5
Z = pd.concat(
(X[cols] / self[g].eigenvalues_[0] ** 0.5 for g, cols in self.groups_.items()),
def _build_Z(self, X):
return pd.concat(
(X[cols] for _, cols in self.groups_.items()),
axis="columns",
)
U = self.svd_.U
s = self.svd_.s
M = np.full(len(X), 1 / len(X))

return (Z @ Z.T) @ (M[:, np.newaxis] ** (-0.5) * U * s**-1)

@utils.check_is_dataframe_input
@utils.check_is_fitted
def row_coordinates_bis(self, X):
def row_coordinates(self, X):
"""Returns the row principal coordinates."""

X = (X - X.mean()) / X.std()
Z = pd.concat(
(X[cols] / self[name].eigenvalues_[0] for name, cols in self.groups_.items()),
axis="columns",
)
Z = self._build_Z(X)
return super().row_coordinates(Z)

@utils.check_is_dataframe_input
Expand Down Expand Up @@ -174,7 +138,8 @@ def add_index(g, group_name):
@utils.check_is_dataframe_input
@utils.check_is_fitted
def column_coordinates(self, X):
raise NotImplementedError("MFA inherits from PCA, but this method is not implemented yet")
Z = self._build_Z(X)
return super().column_coordinates(Z)

@utils.check_is_dataframe_input
@utils.check_is_fitted
Expand All @@ -184,27 +149,26 @@ def inverse_transform(self, X):
@utils.check_is_dataframe_input
@utils.check_is_fitted
def row_standard_coordinates(self, X):
raise NotImplementedError("MFA inherits from PCA, but this method is not implemented yet")
Z = self._build_Z(X)
return super().row_standard_coordinates(Z)

@utils.check_is_dataframe_input
@utils.check_is_fitted
def row_cosine_similarities(self, X):
raise NotImplementedError("MFA inherits from PCA, but this method is not implemented yet")
Z = self._build_Z(X)
return super().row_cosine_similarities(Z)

@utils.check_is_dataframe_input
@utils.check_is_fitted
def column_correlations(self, X):
raise NotImplementedError("MFA inherits from PCA, but this method is not implemented yet")
Z = self._build_Z(X)
return super().column_correlations(Z)

@utils.check_is_dataframe_input
@utils.check_is_fitted
def column_cosine_similarities_(self, X):
raise NotImplementedError("MFA inherits from PCA, but this method is not implemented yet")

@property
@utils.check_is_fitted
def column_contributions_(self):
raise NotImplementedError("MFA inherits from PCA, but this method is not implemented yet")
Z = self._build_Z(X)
return super().column_cosine_similarities_(Z)

@utils.check_is_dataframe_input
@utils.check_is_fitted
Expand Down
2 changes: 0 additions & 2 deletions tests/test_mfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class TestMFA:

@pytest.fixture(autouse=True)
def _prepare(self, sup_rows, sup_cols):

self.sup_rows = sup_rows
self.sup_cols = sup_cols

Expand Down Expand Up @@ -80,7 +79,6 @@ def test_eigenvalues(self):
)

def test_group_eigenvalues(self):

for i, group in enumerate(self.groups, start=1):
F = load_df_from_R(f"mfa$separate.analyses$Gr{i}$eig")[: self.mfa.n_components]
P = self.mfa[group]._eigenvalues_summary
Expand Down

0 comments on commit 96d8c5e

Please sign in to comment.