Skip to content

Commit

Permalink
Merge pull request #305 from ZJUEarthData/dev/Yongkang
Browse files Browse the repository at this point in the history
feat: Added AffinityPropagation algorithm in clustering
  • Loading branch information
SanyHe authored Feb 27, 2024
2 parents 6fd12db + ca96d0c commit f329ae8
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 12 deletions.
2 changes: 1 addition & 1 deletion geochemistrypi/data_mining/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
# "Decision Tree",
# Histogram-based Gradient Boosting,
]
CLUSTERING_MODELS = ["KMeans", "DBSCAN", "Agglomerative"]
CLUSTERING_MODELS = ["KMeans", "DBSCAN", "Agglomerative", "AffinityPropagation"]
DECOMPOSITION_MODELS = ["PCA", "T-SNE", "MDS"]

# The model can deal with missing values
Expand Down
78 changes: 68 additions & 10 deletions geochemistrypi/data_mining/model/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ..constants import MLFLOW_ARTIFACT_DATA_PATH, MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH
from ..utils.base import clear_output, save_data, save_fig, save_text
from ._base import WorkflowBase
from .func.algo_clustering._affinitypropagation import affinitypropagation_manual_hyper_parameters
from .func.algo_clustering._agglomerative import agglomerative_manual_hyper_parameters
from .func.algo_clustering._common import plot_silhouette_diagram, plot_silhouette_value_diagram, scatter2d, scatter3d, score
from .func.algo_clustering._dbscan import dbscan_manual_hyper_parameters
Expand Down Expand Up @@ -557,15 +558,63 @@ class AffinityPropagationClustering(ClusteringWorkflowBase):
def __init__(
self,
*,
damping=0.5,
max_iter=200,
convergence_iter=15,
copy=True,
preference=None,
affinity="euclidean",
verbose=False,
random_state=None,
):
damping: float = 0.5,
max_iter: int = 200,
convergence_iter: int = 15,
copy: bool = True,
preference: Optional[Dict] = None,
affinity: str = "euclidean",
verbose: bool = False,
random_state: Optional[Dict] = None,
) -> None:
"""
Parameters
----------
damping : float, default=0.5
Damping factor in the range `[0.5, 1.0)` is the extent to
which the current value is maintained relative to
incoming values (weighted 1 - damping). This in order
to avoid numerical oscillations when updating these
values (messages).
max_iter : int, default=200
Maximum number of iterations.
convergence_iter : int, default=15
Number of iterations with no change in the number
of estimated clusters that stops the convergence.
copy : bool, default=True
Make a copy of input data.
preference : array-like of shape (n_samples,) or float, default=None
Preferences for each point - points with larger values of
preferences are more likely to be chosen as exemplars. The number
of exemplars, ie of clusters, is influenced by the input
preferences value. If the preferences are not passed as arguments,
they will be set to the median of the input similarities.
affinity : {'euclidean', 'precomputed'}, default='euclidean'
Which affinity to use. At the moment 'precomputed' and
``euclidean`` are supported. 'euclidean' uses the
negative squared euclidean distance between points.
verbose : bool, default=False
Whether to be verbose.
random_state : int, RandomState instance or None, default=None
Pseudo-random number generator to control the starting state.
Use an int for reproducible results across function calls.
See the :term:`Glossary <random_state>`.
.. versionadded:: 0.23
this parameter was previously hardcoded as 0.
References
----------------------------------------
Scikit-learn API: sklearn.cluster.AffinityPropagation
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AffinityPropagation
"""

super().__init__()
self.damping = damping
Expand All @@ -592,7 +641,16 @@ def __init__(
)
self.naming = AffinityPropagationClustering.name

pass
@classmethod
def manual_hyper_parameters(cls) -> Dict:
"""Manual hyper-parameters specification."""
print(f"-*-*- {cls.name} - Hyper-parameters Specification -*-*-")
hyper_parameters = affinitypropagation_manual_hyper_parameters()
clear_output()
return hyper_parameters

def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
"""Invoke all special application functions for this algorithms by Scikit-learn framework."""


class MeanShiftClustering(ClusteringWorkflowBase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import Dict

from rich import print

from ....constants import SECTION
from ....data.data_readiness import float_input, num_input, str_input


def affinitypropagation_manual_hyper_parameters() -> Dict:
"""Manually set hyperparameters.
Returns
-------
hyper_parameters : dict
"""
print("damping: The extent to which the current value is maintained relative to incoming values ")
print("Please specify the number of clusters for AffinityPropagation. A good starting range could be between 0.5 and 1, such as 0.5.")
damping = float_input(0.5, SECTION[2], "damping: ")
print("Max Iter: Maximum number of iterations of the algorithm for a single run.")
print("Please specify the maximum number of iterations of the affinitypropagation algorithm for a single run. A good starting range could be between 100 and 400, such as 200.")
max_iter = num_input(SECTION[2], "Max Iter: ")
print("convergence_iter: Number of iterations with no change in the number of estimated clusters that stops the convergence.")
print("Please specify the convergence number of iterations of the affinitypropagation algorithm. A good starting range could be between 10 and 15, such as 15.")
convergence_iter = num_input(SECTION[2], "convergence_iter: ")
print("affinity: Different affinity methods for affinitypropagation in clustering.")
print("Please specify the affinity to use for the computation. It is generally recommended to leave it as 'euclidean'.")
affinity = ["euclidean", "precomputed"]
affinity = str_input(affinity, SECTION[2])
hyper_parameters = {"damping": damping, "max_iter": max_iter, "convergence_iter": convergence_iter, "affinity": affinity}
return hyper_parameters
10 changes: 9 additions & 1 deletion geochemistrypi/data_mining/process/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd

from ..model.clustering import Agglomerative, ClusteringWorkflowBase, DBSCANClustering, KMeansClustering
from ..model.clustering import AffinityPropagationClustering, Agglomerative, ClusteringWorkflowBase, DBSCANClustering, KMeansClustering
from ._base import ModelSelectionBase


Expand Down Expand Up @@ -54,6 +54,14 @@ def activate(
n_clusters=hyper_parameters["n_clusters"],
linkage=hyper_parameters["linkage"],
)
elif self.model_name == "AffinityPropagation":
hyper_parameters = AffinityPropagationClustering.manual_hyper_parameters()
self.clt_workflow = AffinityPropagationClustering(
damping=hyper_parameters["damping"],
max_iter=hyper_parameters["max_iter"],
convergence_iter=hyper_parameters["convergence_iter"],
affinity=hyper_parameters["affinity"],
)
elif self.model_name == "":
pass

Expand Down

0 comments on commit f329ae8

Please sign in to comment.