From a78608c6473769d64bbb60559dce896c37888740 Mon Sep 17 00:00:00 2001 From: Gustavo Rosa Date: Sat, 30 Apr 2022 16:18:09 -0300 Subject: [PATCH] chore(opfython): Adds second part of annotated typing. --- opfython/core/heap.py | 100 +++++++++++++++-------------- opfython/core/node.py | 97 +++++++++++++++------------- opfython/core/opf.py | 65 ++++++++++--------- opfython/core/subgraph.py | 69 +++++++++++--------- opfython/models/knn_supervised.py | 76 ++++++++++++++-------- opfython/models/semi_supervised.py | 27 +++++--- opfython/models/supervised.py | 69 +++++++++++++------- opfython/models/unsupervised.py | 68 +++++++++++--------- 8 files changed, 326 insertions(+), 245 deletions(-) diff --git a/opfython/core/heap.py b/opfython/core/heap.py index a9bab44..3df3415 100644 --- a/opfython/core/heap.py +++ b/opfython/core/heap.py @@ -1,6 +1,8 @@ """Standard Heap implementation. """ +from typing import List, Optional + import opfython.utils.constants as c import opfython.utils.exception as e @@ -8,12 +10,12 @@ class Heap: """A standard implementation of a Heap structure.""" - def __init__(self, size=1, policy="min"): + def __init__(self, size: Optional[int] = 1, policy: Optional[str] = "min") -> None: """Initialization method. Args: - size (int): Maximum size of the heap. - policy (str): Heap's policy (`min` or `max`). + size: Maximum size of the heap. + policy: Heap's policy (`min` or `max`). """ @@ -39,13 +41,13 @@ def __init__(self, size=1, policy="min"): self.last = -1 @property - def size(self): - """int: Maximum size of the heap.""" + def size(self) -> int: + """Maximum size of the heap.""" return self._size @size.setter - def size(self, size): + def size(self, size: int) -> None: if not isinstance(size, int): raise e.TypeError("`size` should be an integer") if size < 1: @@ -54,78 +56,78 @@ def size(self, size): self._size = size @property - def policy(self): - """str: Policy that rules the heap.""" + def policy(self) -> str: + """Policy that rules the heap.""" return self._policy @policy.setter - def policy(self, policy): + def policy(self, policy: str) -> None: if policy not in ["min", "max"]: raise e.ValueError("`policy` should be `min` or `max`") self._policy = policy @property - def cost(self): - """list: List of nodes' costs.""" + def cost(self) -> List[float]: + """List of nodes' costs.""" return self._cost @cost.setter - def cost(self, cost): + def cost(self, cost: List[float]) -> None: if not isinstance(cost, list): raise e.TypeError("`cost` should be a list") self._cost = cost @property - def color(self): - """list: List of nodes' colors.""" + def color(self) -> List[int]: + """List of nodes' colors.""" return self._color @color.setter - def color(self, color): + def color(self, color: List[int]) -> None: if not isinstance(color, list): raise e.TypeError("`color` should be a list") self._color = color @property - def p(self): - """list: List of nodes' values.""" + def p(self) -> List[int]: + """List of nodes' values.""" return self._p @p.setter - def p(self, p): + def p(self, p: List[int]) -> None: if not isinstance(p, list): raise e.TypeError("`p` should be a list") self._p = p @property - def pos(self): - """list: List of nodes' positioning markers.""" + def pos(self) -> List[int]: + """List of nodes' positioning markers.""" return self._pos @pos.setter - def pos(self, pos): + def pos(self, pos: List[int]) -> None: if not isinstance(pos, list): raise e.TypeError("`pos` should be a list") self._pos = pos @property - def last(self): - """int: Last element identifier.""" + def last(self) -> int: + """Last element identifier.""" return self._last @last.setter - def last(self, last): + def last(self, last: int) -> None: if not isinstance(last, int): raise e.TypeError("`last` should be an integer") if last < -1: @@ -133,11 +135,11 @@ def last(self, last): self._last = last - def is_full(self): + def is_full(self) -> bool: """Checks if the heap is full. Returns: - A boolean indicating whether the heap is full. + (bool): A boolean indicating whether the heap is full. """ @@ -146,11 +148,11 @@ def is_full(self): return False - def is_empty(self): + def is_empty(self) -> bool: """Checks if the heap is empty. Returns: - A boolean indicating whether the heap is empty. + (bool): A boolean indicating whether the heap is empty. """ @@ -159,50 +161,50 @@ def is_empty(self): return False - def dad(self, i): + def dad(self, i: int) -> int: """Gathers the position of the node's dad. Args: - i (int): Node's position. + i: Node's position. Returns: - The position of node's dad. + (int): The position of node's dad. """ return int(((i - 1) / 2)) - def left_son(self, i): + def left_son(self, i: int) -> int: """Gathers the position of the node's left son. Args: - i (int): Node's position. + i: Node's position. Returns: - The position of node's left son + (int): The position of node's left son """ return int((2 * i + 1)) - def right_son(self, i): + def right_son(self, i: int) -> int: """Gathers the position of the node's right son. Args: - i (int): Node's position. + i: Node's position. Returns: - The position of node's right son. + (int): The position of node's right son. """ return int((2 * i + 2)) - def go_up(self, i): + def go_up(self, i: int) -> None: """Goes up in the heap. Args: - i (int): Position to be achieved. + i: Position to be achieved. """ @@ -243,11 +245,11 @@ def go_up(self, i): # Gathers the new dad's position j = self.dad(i) - def go_down(self, i): + def go_down(self, i: int) -> None: """Goes down in the heap. Args: - i (int): Position to be achieved. + i: Position to be achieved. """ @@ -294,14 +296,14 @@ def go_down(self, i): # Goes down in the heap self.go_down(j) - def insert(self, p): + def insert(self, p: int) -> bool: """Inserts a new node into the heap. Args: - p (int): Node's value to be inserted. + p: Node's value to be inserted. Returns: - Boolean indicating whether insertion was performed correctly. + (bool): Boolean indicating whether insertion was performed correctly. """ @@ -320,11 +322,11 @@ def insert(self, p): return False - def remove(self): + def remove(self) -> int: """Removes a node from the heap. Returns: - The removed node value. + (int): The removed node value. """ @@ -352,12 +354,12 @@ def remove(self): return False - def update(self, p, cost): + def update(self, p: int, cost: float) -> None: """Updates a node with a new value. Args: - p (int): Node's position. - cost (float): Node's cost. + p: Node's position. + cost: Node's cost. """ diff --git a/opfython/core/node.py b/opfython/core/node.py index e306ee9..f5283d5 100644 --- a/opfython/core/node.py +++ b/opfython/core/node.py @@ -1,6 +1,8 @@ """Node structure that belongs to the Optimum-Path Forest. """ +from typing import List, Optional + import numpy as np import opfython.utils.constants as c @@ -13,13 +15,18 @@ class Node: """A Node class is used as the lowest structure level in the OPF workflow.""" - def __init__(self, idx=0, label=0, features=None): + def __init__( + self, + idx: Optional[int] = 0, + label: Optional[int] = 0, + features: Optional[np.array] = None, + ) -> None: """Initialization method. Args: - idx (int): The node's identifier. - label (int): The node's label. - features (np.array): An array of features. + idx: The node's identifier. + label: The node's label. + features: An array of features. """ @@ -66,13 +73,13 @@ def __init__(self, idx=0, label=0, features=None): self.relevant = c.IRRELEVANT @property - def idx(self): - """int: Node's index.""" + def idx(self) -> int: + """Node's index.""" return self._idx @idx.setter - def idx(self, idx): + def idx(self, idx: int) -> None: if not isinstance(idx, int): raise e.TypeError("`idx` should be an integer") if idx < 0: @@ -81,13 +88,13 @@ def idx(self, idx): self._idx = idx @property - def label(self): - """int: Node's label (true label).""" + def label(self) -> int: + """Node's label (true label).""" return self._label @label.setter - def label(self, label): + def label(self, label: int) -> None: if not isinstance(label, int): raise e.TypeError("`label` should be an integer") if label < 0: @@ -96,13 +103,13 @@ def label(self, label): self._label = label @property - def predicted_label(self): - """int: Node's predicted label.""" + def predicted_label(self) -> int: + """Node's predicted label.""" return self._predicted_label @predicted_label.setter - def predicted_label(self, predicted_label): + def predicted_label(self, predicted_label: int) -> None: if not isinstance(predicted_label, int): raise e.TypeError("`predicted_label` should be an integer") if predicted_label < 0: @@ -111,13 +118,13 @@ def predicted_label(self, predicted_label): self._predicted_label = predicted_label @property - def cluster_label(self): - """int: Node's cluster assignment identifier.""" + def cluster_label(self) -> int: + """Node's cluster assignment identifier.""" return self._cluster_label @cluster_label.setter - def cluster_label(self, cluster_label): + def cluster_label(self, cluster_label: int) -> None: if not isinstance(cluster_label, int): raise e.TypeError("`cluster_label` should be an integer") if cluster_label < 0: @@ -126,65 +133,65 @@ def cluster_label(self, cluster_label): self._cluster_label = cluster_label @property - def features(self): + def features(self) -> np.array: """np.array: N-dimensional array of features.""" return self._features @features.setter - def features(self, features): + def features(self, features: np.array) -> None: if not isinstance(features, np.ndarray): raise e.TypeError("`features` should be a numpy array") self._features = features @property - def cost(self): - """float: Node's cost.""" + def cost(self) -> float: + """Node's cost.""" return self._cost @cost.setter - def cost(self, cost): + def cost(self, cost: float) -> None: if not isinstance(cost, (float, int, np.int32, np.int64)): raise e.TypeError("`cost` should be a float or integer") self._cost = cost @property - def density(self): - """float: Node's density.""" + def density(self) -> float: + """Node's density.""" return self._density @density.setter - def density(self, density): + def density(self, density: float) -> None: if not isinstance(density, (float, int, np.int32, np.int64)): raise e.TypeError("`density` should be a float or integer") self._density = density @property - def radius(self): - """float: Maximum distance among the k-nearest neighbors.""" + def radius(self) -> float: + """Maximum distance among the k-nearest neighbors.""" return self._radius @radius.setter - def radius(self, radius): + def radius(self, radius: float) -> None: if not isinstance(radius, (float, int, np.int32, np.int64)): raise e.TypeError("`radius` should be a float or integer") self._radius = radius @property - def n_plateaus(self): - """int: Amount of adjacent nodes on plateaus.""" + def n_plateaus(self) -> int: + """Amount of adjacent nodes on plateaus.""" return self._n_plateaus @n_plateaus.setter - def n_plateaus(self, n_plateaus): + def n_plateaus(self, n_plateaus: int) -> None: if not isinstance(n_plateaus, int): raise e.TypeError("`n_plateaus` should be an integer") if n_plateaus < 0: @@ -193,26 +200,26 @@ def n_plateaus(self, n_plateaus): self._n_plateaus = n_plateaus @property - def adjacency(self): - """list: Adjacent nodes.""" + def adjacency(self) -> List[int]: + """Adjacent nodes.""" return self._adjacency @adjacency.setter - def adjacency(self, adjacency): + def adjacency(self, adjacency: List[int]) -> None: if not isinstance(adjacency, list): raise e.TypeError("`adjacency` should be a list") self._adjacency = adjacency @property - def root(self): - """int: Cluster's root node identifier.""" + def root(self) -> int: + """Cluster's root node identifier.""" return self._root @root.setter - def root(self, root): + def root(self, root: int) -> None: if not isinstance(root, int): raise e.TypeError("`root` should be an integer") if root < 0: @@ -221,26 +228,26 @@ def root(self, root): self._root = root @property - def status(self): - """int: Whether the node is a prototype or not.""" + def status(self) -> int: + """Whether the node is a prototype or not.""" return self._status @status.setter - def status(self, status): + def status(self, status: int) -> None: if status not in [c.STANDARD, c.PROTOTYPE]: raise e.TypeError("`status` should be `STANDARD` or `PROTOTYPE`") self._status = status @property - def pred(self): - """int: Identifier to the predecessor node.""" + def pred(self) -> int: + """Identifier to the predecessor node.""" return self._pred @pred.setter - def pred(self, pred): + def pred(self, pred: int) -> None: if not isinstance(pred, int): raise e.TypeError("`pred` should be an integer") if pred < c.NIL: @@ -249,13 +256,13 @@ def pred(self, pred): self._pred = pred @property - def relevant(self): - """int: Whether the node is relevant or not.""" + def relevant(self) -> int: + """Whether the node is relevant or not.""" return self._relevant @relevant.setter - def relevant(self, relevant): + def relevant(self, relevant: int) -> None: if relevant not in [c.RELEVANT, c.IRRELEVANT]: raise e.TypeError("`relevant` should be `RELEVANT` or `IRRELEVANT`") diff --git a/opfython/core/opf.py b/opfython/core/opf.py index 3f5c339..5df40e1 100644 --- a/opfython/core/opf.py +++ b/opfython/core/opf.py @@ -2,6 +2,7 @@ """ import pickle +from typing import List, Optional import numpy as np @@ -23,12 +24,16 @@ class OPF: """ - def __init__(self, distance="log_squared_euclidean", pre_computed_distance=None): + def __init__( + self, + distance: Optional[str] = "log_squared_euclidean", + pre_computed_distance: Optional[str] = None, + ) -> None: """Initialization method. Args: - distance (str): An indicator of the distance metric to be used. - pre_computed_distance (str): A pre-computed distance file for feeding into OPF. + distance: An indicator of the distance metric to be used. + pre_computed_distance: A pre-computed distance file for feeding into OPF. """ @@ -66,13 +71,13 @@ def __init__(self, distance="log_squared_euclidean", pre_computed_distance=None) logger.info("Class created.") @property - def subgraph(self): - """Subgraph: Subgraph's instance.""" + def subgraph(self) -> Subgraph: + """Subgraph's instance.""" return self._subgraph @subgraph.setter - def subgraph(self, subgraph): + def subgraph(self, subgraph: Subgraph) -> None: if subgraph is not None: if not isinstance(subgraph, Subgraph): raise e.TypeError("`subgraph` should be a subgraph") @@ -80,13 +85,13 @@ def subgraph(self, subgraph): self._subgraph = subgraph @property - def distance(self): - """str: Distance metric to be used.""" + def distance(self) -> str: + """Distance metric to be used.""" return self._distance @distance.setter - def distance(self, distance): + def distance(self, distance: str) -> None: if distance not in [ "additive_symmetric", "average_euclidean", @@ -151,50 +156,50 @@ def distance(self, distance): self._distance = distance @property - def distance_fn(self): - """callable: Distance function to be used.""" + def distance_fn(self) -> callable: + """Distance function to be used.""" return self._distance_fn @distance_fn.setter - def distance_fn(self, distance_fn): + def distance_fn(self, distance_fn: callable) -> None: if not callable(distance_fn): raise e.TypeError("`distance_fn` should be a callable") self._distance_fn = distance_fn @property - def pre_computed_distance(self): - """bool: Whether OPF should use a pre-computed distance or not.""" + def pre_computed_distance(self) -> bool: + """Whether OPF should use a pre-computed distance or not.""" return self._pre_computed_distance @pre_computed_distance.setter - def pre_computed_distance(self, pre_computed_distance): + def pre_computed_distance(self, pre_computed_distance: bool) -> None: if not isinstance(pre_computed_distance, bool): raise e.TypeError("`pre_computed_distance` should be a boolean") self._pre_computed_distance = pre_computed_distance @property - def pre_distances(self): - """np.array: Pre-computed distance matrix.""" + def pre_distances(self) -> np.array: + """Pre-computed distance matrix.""" return self._pre_distances @pre_distances.setter - def pre_distances(self, pre_distances): + def pre_distances(self, pre_distances: np.array) -> None: if pre_distances is not None: if not isinstance(pre_distances, np.ndarray): raise e.TypeError("`pre_distances` should be a numpy array") self._pre_distances = pre_distances - def _read_distances(self, file_name): + def _read_distances(self, file_name: str) -> None: """Reads the distance between nodes from a pre-defined file. Args: - file_name (str): File to be loaded. + file_name: File to be loaded. """ @@ -222,11 +227,11 @@ def _read_distances(self, file_name): # Apply the distances matrix to the property self.pre_distances = distances - def load(self, file_name): + def load(self, file_name: str) -> None: """Loads the object from a pickle encoding. Args: - file_name (str): Pickle's file path to be loaded. + file_name: Pickle's file path to be loaded. """ @@ -239,11 +244,11 @@ def load(self, file_name): logger.info("Model loaded.") - def save(self, file_name): + def save(self, file_name: str) -> None: """Saves the object to a pickle encoding. Args: - file_name (str): File's name to be saved. + file_name: File's name to be saved. """ @@ -254,29 +259,29 @@ def save(self, file_name): logger.info("Model saved.") - def fit(self, X, Y): + def fit(self, X: np.array, Y: np.array) -> None: """Fits data in the classifier. It should be directly implemented in OPF child classes. Args: - X (np.array): Array of features. - Y (np.array): Array of labels. + X: Array of features. + Y: Array of labels. """ raise NotImplementedError - def predict(self, X): + def predict(self, X: np.array) -> List[int]: """Predicts new data using the pre-trained classifier. It should be directly implemented in OPF child classes. Args: - X (np.array): Array of features. + X: Array of features. Returns: - A list of predictions for each record of the data. + (List[int]): A list of predictions for each record of the data. """ diff --git a/opfython/core/subgraph.py b/opfython/core/subgraph.py index 30d90e0..9350c11 100644 --- a/opfython/core/subgraph.py +++ b/opfython/core/subgraph.py @@ -1,6 +1,8 @@ """Subgraph structure that belongs to the Optimum-Path Forest. """ +from typing import List, Optional, Tuple + import numpy as np import opfython.stream.parser as p @@ -16,14 +18,20 @@ class Subgraph: """A Subgraph class is used as a collection of Nodes and the basic structure to work with OPF.""" - def __init__(self, X=None, Y=None, I=None, from_file=None): + def __init__( + self, + X: Optional[np.array] = None, + Y: Optional[np.array] = None, + I: Optional[np.array] = None, + from_file: Optional[bool] = None, + ) -> None: """Initialization method. Args: - X (np.array): Array of features. - Y (np.array): Array of labels. - I (np.array): Array of indexes. - from_file (bool): Whether Subgraph should be directly created from a file. + X: Array of features. + Y: Array of labels. + I: Array of indexes. + from_file: Whether Subgraph should be directly created from a file. """ @@ -59,13 +67,13 @@ def __init__(self, X=None, Y=None, I=None, from_file=None): logger.error("Subgraph has not been properly created.") @property - def n_nodes(self): - """int: Number of nodes.""" + def n_nodes(self) -> int: + """Number of nodes.""" return len(self.nodes) @n_nodes.setter - def n_nodes(self, n_nodes): + def n_nodes(self, n_nodes: int) -> None: if not isinstance(n_nodes, int): raise e.TypeError("`n_nodes` should be an integer") if n_nodes < 0: @@ -74,13 +82,13 @@ def n_nodes(self, n_nodes): self._n_nodes = n_nodes @property - def n_features(self): - """int: Number of features.""" + def n_features(self) -> int: + """Number of features.""" return self._n_features @n_features.setter - def n_features(self, n_features): + def n_features(self, n_features: int) -> None: if not isinstance(n_features, int): raise e.TypeError("`n_features` should be an integer") if n_features < 0: @@ -89,52 +97,52 @@ def n_features(self, n_features): self._n_features = n_features @property - def nodes(self): - """list: List of nodes that belongs to the Subgraph.""" + def nodes(self) -> List[Node]: + """List of nodes that belongs to the Subgraph.""" return self._nodes @nodes.setter - def nodes(self, nodes): + def nodes(self, nodes: List[Node]) -> None: if not isinstance(nodes, list): raise e.TypeError("`nodes` should be a list") self._nodes = nodes @property - def idx_nodes(self): - """list: List of ordered nodes indexes.""" + def idx_nodes(self) -> List[int]: + """List of ordered nodes indexes.""" return self._idx_nodes @idx_nodes.setter - def idx_nodes(self, idx_nodes): + def idx_nodes(self, idx_nodes: List[int]) -> None: if not isinstance(idx_nodes, list): raise e.TypeError("`idx_nodes` should be a list") self._idx_nodes = idx_nodes @property - def trained(self): - """bool: Indicate whether the subgraph is trained.""" + def trained(self) -> bool: + """Indicate whether the subgraph is trained.""" return self._trained @trained.setter - def trained(self, trained): + def trained(self, trained: bool) -> None: if not isinstance(trained, bool): raise e.TypeError("`trained` should be a boolean") self._trained = trained - def _load(self, file_path): + def _load(self, file_path: str) -> Tuple[np.array, np.array]: """Loads and parses a dataframe from a file. Args: - file_path (str): File to be loaded. + file_path: File to be loaded. Returns: - Arrays holding the features and labels. + (Tuple[np.array, np.array]): Arrays holding the features and labels. """ @@ -159,15 +167,16 @@ def _load(self, file_path): return X, Y - def _build(self, X, Y, I): + def _build(self, X: np.array, Y: np.array, I: np.array) -> None: """This method serves as the object building process. One can define several commands here that does not necessarily needs to be on its initialization. Args: - X (np.array): Features array. - Y (np.array): Labels array. + X: Features array. + Y: Labels array. + I: Indexes array. """ @@ -186,7 +195,7 @@ def _build(self, X, Y, I): # Calculates the number of features self.n_features = self.nodes[0].features.shape[0] - def destroy_arcs(self): + def destroy_arcs(self) -> None: """Destroy the arcs present in the subgraph.""" for i in range(self.n_nodes): @@ -194,11 +203,11 @@ def destroy_arcs(self): self.nodes[i].n_plateaus = 0 self.nodes[i].adjacency = [] - def mark_nodes(self, i): + def mark_nodes(self, i: int) -> None: """Marks a node and its whole path as relevant. Args: - i (int): An identifier of the node to start the marking. + i: An identifier of the node to start the marking. """ @@ -213,7 +222,7 @@ def mark_nodes(self, i): # Marks the first node as relevant self.nodes[i].relevant = c.RELEVANT - def reset(self): + def reset(self) -> None: """Resets the subgraph predecessors and arcs.""" for i in range(self.n_nodes): diff --git a/opfython/models/knn_supervised.py b/opfython/models/knn_supervised.py index b6f3662..7b813d2 100644 --- a/opfython/models/knn_supervised.py +++ b/opfython/models/knn_supervised.py @@ -2,6 +2,7 @@ """ import time +from typing import List, Optional import numpy as np @@ -25,14 +26,17 @@ class KNNSupervisedOPF(OPF): """ def __init__( - self, max_k=1, distance="log_squared_euclidean", pre_computed_distance=None - ): + self, + max_k: Optional[int] = 1, + distance: Optional[str] = "log_squared_euclidean", + pre_computed_distance: Optional[str] = None, + ) -> None: """Initialization method. Args: - max_k (int): Maximum `k` value for cutting the subgraph. - distance (str): An indicator of the distance metric to be used. - pre_computed_distance (str): A pre-computed distance file for feeding into OPF. + max_k: Maximum `k` value for cutting the subgraph. + distance: An indicator of the distance metric to be used. + pre_computed_distance: A pre-computed distance file for feeding into OPF. """ @@ -46,13 +50,13 @@ def __init__( logger.info("Class overrided.") @property - def max_k(self): - """int: Maximum `k` value for cutting the subgraph.""" + def max_k(self) -> int: + """Maximum `k` value for cutting the subgraph.""" return self._max_k @max_k.setter - def max_k(self, max_k): + def max_k(self, max_k: int) -> None: if not isinstance(max_k, int): raise e.TypeError("`max_k` should be an integer") if max_k < 1: @@ -60,11 +64,11 @@ def max_k(self, max_k): self._max_k = max_k - def _clustering(self, force_prototype=False): + def _clustering(self, force_prototype: Optional[bool] = False) -> None: """Clusters the subgraph. Args: - force_prototype (bool): Whether clustering should for each class to have at least one prototype. + force_prototype: Whether clustering should for each class to have at least one prototype. """ @@ -152,16 +156,24 @@ def _clustering(self, force_prototype=False): # Updates node `q` on the heap with the current cost h.update(q, current_cost) - def _learn(self, X_train, Y_train, I_train, X_val, Y_val, I_val): + def _learn( + self, + X_train: np.array, + Y_train: np.array, + I_train: np.array, + X_val: np.array, + Y_val: np.array, + I_val: np.array, + ) -> None: """Learns the best `k` value over the validation set. Args: - X_train (np.array): Array of training features. - Y_train (np.array): Array of training labels. - I_train (np.array): Array of training indexes. - X_val (np.array): Array of validation features. - Y_val (np.array): Array of validation labels. - I_val (np.array): Array of validation indexes. + X_train: Array of training features. + Y_train: Array of training labels. + I_train: Array of training indexes. + X_val: Array of validation features. + Y_val: Array of validation labels. + I_val: Array of validation indexes. """ @@ -215,16 +227,24 @@ def _learn(self, X_train, Y_train, I_train, X_val, Y_val, I_val): self.subgraph.best_k = best_k - def fit(self, X_train, Y_train, X_val, Y_val, I_train=None, I_val=None): + def fit( + self, + X_train: np.array, + Y_train: np.array, + X_val: np.array, + Y_val: np.array, + I_train: Optional[np.array] = None, + I_val: Optional[np.array] = None, + ) -> None: """Fits data in the classifier. Args: - X_train (np.array): Array of training features. - Y_train (np.array): Array of training labels. - X_val (np.array): Array of validation features. - Y_val (np.array): Array of validation labels. - I_train (np.array): Array of training indexes. - I_val (np.array): Array of validation indexes. + X_train: Array of training features. + Y_train: Array of training labels. + X_val: Array of validation features. + Y_val: Array of validation labels. + I_train: Array of training indexes. + I_val: Array of validation indexes. """ @@ -266,15 +286,15 @@ def fit(self, X_train, Y_train, X_val, Y_val, I_train=None, I_val=None): logger.info("Classifier has been fitted with k = %d.", self.subgraph.best_k) logger.info("Training time: %s seconds.", train_time) - def predict(self, X_test, I_test=None): + def predict(self, X_test: np.array, I_test: Optional[np.array] = None) -> List[int]: """Predicts new data using the pre-trained classifier. Args: - X_test (np.array): Array of features. - I_test (np.array): Array of indexes. + X_test: Array of features. + I_test: Array of indexes. Returns: - A list of predictions for each record of the data. + (List[int]): A list of predictions for each record of the data. """ diff --git a/opfython/models/semi_supervised.py b/opfython/models/semi_supervised.py index 3a5a2f8..1e43987 100644 --- a/opfython/models/semi_supervised.py +++ b/opfython/models/semi_supervised.py @@ -2,6 +2,7 @@ """ import time +from typing import Optional import numpy as np @@ -22,12 +23,16 @@ class SemiSupervisedOPF(SupervisedOPF): """ - def __init__(self, distance="log_squared_euclidean", pre_computed_distance=None): + def __init__( + self, + distance: Optional[str] = "log_squared_euclidean", + pre_computed_distance: Optional[str] = None, + ) -> None: """Initialization method. Args: - distance (str): An indicator of the distance metric to be used. - pre_computed_distance (str): A pre-computed distance file for feeding into OPF. + distance: An indicator of the distance metric to be used. + pre_computed_distance: A pre-computed distance file for feeding into OPF. """ @@ -37,14 +42,20 @@ def __init__(self, distance="log_squared_euclidean", pre_computed_distance=None) logger.info("Class overrided.") - def fit(self, X_train, Y_train, X_unlabeled, I_train=None): + def fit( + self, + X_train: np.array, + Y_train: np.array, + X_unlabeled: np.array, + I_train: Optional[np.array] = None, + ) -> None: """Fits data in the semi-supervised classifier. Args: - X_train (np.array): Array of training features. - Y_train (np.array): Array of training labels. - X_unlabeled (np.array): Array of unlabeled features. - I_train (np.array): Array of training indexes. + X_train: Array of training features. + Y_train: Array of training labels. + X_unlabeled: Array of unlabeled features. + I_train: Array of training indexes. """ diff --git a/opfython/models/supervised.py b/opfython/models/supervised.py index 382d022..36f7c97 100644 --- a/opfython/models/supervised.py +++ b/opfython/models/supervised.py @@ -3,6 +3,7 @@ import copy import time +from typing import List, Optional import numpy as np @@ -25,12 +26,16 @@ class SupervisedOPF(OPF): """ - def __init__(self, distance="log_squared_euclidean", pre_computed_distance=None): + def __init__( + self, + distance: Optional[str] = "log_squared_euclidean", + pre_computed_distance: Optional[str] = None, + ) -> None: """Initialization method. Args: - distance (str): An indicator of the distance metric to be used. - pre_computed_distance (str): A pre-computed distance file for feeding into OPF. + distance: An indicator of the distance metric to be used. + pre_computed_distance: A pre-computed distance file for feeding into OPF. """ @@ -40,7 +45,7 @@ def __init__(self, distance="log_squared_euclidean", pre_computed_distance=None) logger.info("Class overrided.") - def _find_prototypes(self): + def _find_prototypes(self) -> None: """Find prototype nodes using the Minimum Spanning Tree (MST) approach.""" logger.debug("Finding prototypes ...") @@ -109,13 +114,15 @@ def _find_prototypes(self): logger.debug("Prototypes: %s.", prototypes) - def fit(self, X_train, Y_train, I_train=None): + def fit( + self, X_train: np.array, Y_train: np.array, I_train: Optional[np.array] = None + ) -> None: """Fits data in the classifier. Args: - X_train (np.array): Array of training features. - Y_train (np.array): Array of training labels. - I_train (np.array): Array of training indexes. + X_train: Array of training features. + Y_train: Array of training labels. + I_train: Array of training indexes. """ @@ -199,15 +206,15 @@ def fit(self, X_train, Y_train, I_train=None): logger.info("Classifier has been fitted.") logger.info("Training time: %s seconds.", train_time) - def predict(self, X_val, I_val=None): + def predict(self, X_val: np.array, I_val: Optional[np.array] = None) -> List[int]: """Predicts new data using the pre-trained classifier. Args: - X_val (np.array): Array of validation or test features. - I_val (np.array): Array of validation or test indexes. + X_val: Array of validation or test features. + I_val: Array of validation or test indexes. Returns: - A list of predictions for each record of the data. + (List[int]): A list of predictions for each record of the data. """ @@ -308,15 +315,22 @@ def predict(self, X_val, I_val=None): return preds - def learn(self, X_train, Y_train, X_val, Y_val, n_iterations=10): + def learn( + self, + X_train: np.array, + Y_train: np.array, + X_val: np.array, + Y_val: np.array, + n_iterations: Optional[int] = 10, + ) -> None: """Learns the best classifier over a validation set. Args: - X_train (np.array): Array of training features. - Y_train (np.array): Array of training labels. - X_val (np.array): Array of validation features. - Y_val (np.array): Array of validation labels. - n_iterations (int): Number of iterations. + X_train: Array of training features. + Y_train: Array of training labels. + X_val: Array of validation features. + Y_val: Array of validation labels. + n_iterations: Number of iterations. """ @@ -413,15 +427,22 @@ def learn(self, X_train, Y_train, X_val, Y_val, n_iterations=10): break - def prune(self, X_train, Y_train, X_val, Y_val, n_iterations=10): + def prune( + self, + X_train: np.array, + Y_train: np.array, + X_val: np.array, + Y_val: np.array, + n_iterations: Optional[int] = 10, + ) -> None: """Prunes a classifier over a validation set. Args: - X_train (np.array): Array of training features. - Y_train (np.array): Array of training labels. - X_val (np.array): Array of validation features. - Y_val (np.array): Array of validation labels. - n_iterations (int): Maximum number of iterations. + X_train: Array of training features. + Y_train: Array of training labels. + X_val: Array of validation features. + Y_val: Array of validation labels. + n_iterations: Maximum number of iterations. """ diff --git a/opfython/models/unsupervised.py b/opfython/models/unsupervised.py index da202d9..f8297ea 100644 --- a/opfython/models/unsupervised.py +++ b/opfython/models/unsupervised.py @@ -2,6 +2,7 @@ """ import time +from typing import List, Optional import numpy as np @@ -26,18 +27,18 @@ class UnsupervisedOPF(OPF): def __init__( self, - min_k=1, - max_k=1, - distance="log_squared_euclidean", - pre_computed_distance=None, + min_k: Optional[int] = 1, + max_k: Optional[int] = 1, + distance: Optional[str] = "log_squared_euclidean", + pre_computed_distance: Optional[str] = None, ): """Initialization method. Args: - min_k (int): Minimum `k` value for cutting the subgraph. - max_k (int): Maximum `k` value for cutting the subgraph. - distance (str): An indicator of the distance metric to be used. - pre_computed_distance (str): A pre-computed distance file for feeding into OPF. + min_k: Minimum `k` value for cutting the subgraph. + max_k: Maximum `k` value for cutting the subgraph. + distance: An indicator of the distance metric to be used. + pre_computed_distance: A pre-computed distance file for feeding into OPF. """ @@ -54,13 +55,13 @@ def __init__( logger.info("Class overrided.") @property - def min_k(self): - """int: Minimum `k` value for cutting the subgraph.""" + def min_k(self) -> int: + """Minimum `k` value for cutting the subgraph.""" return self._min_k @min_k.setter - def min_k(self, min_k): + def min_k(self, min_k: int) -> None: if not isinstance(min_k, int): raise e.TypeError("`min_k` should be an integer") if min_k < 1: @@ -69,13 +70,13 @@ def min_k(self, min_k): self._min_k = min_k @property - def max_k(self): - """int: Maximum `k` value for cutting the subgraph.""" + def max_k(self) -> int: + """Maximum `k` value for cutting the subgraph.""" return self._max_k @max_k.setter - def max_k(self, max_k): + def max_k(self, max_k: int) -> None: if not isinstance(max_k, int): raise e.TypeError("`max_k` should be an integer") if max_k < 1: @@ -85,11 +86,11 @@ def max_k(self, max_k): self._max_k = max_k - def _clustering(self, n_neighbours): + def _clustering(self, n_neighbours: int) -> None: """Clusters the subgraph using using a `k` value (number of neighbours). Args: - n_neighbours (int): Number of neighbours to be used. + n_neighbours: Number of neighbours to be used. """ @@ -192,14 +193,14 @@ def _clustering(self, n_neighbours): # The final number of clusters will be equal to `l` self.subgraph.n_clusters = l - def _normalized_cut(self, n_neighbours): + def _normalized_cut(self, n_neighbours: int) -> int: """Performs a normalized cut over the subgraph using a `k` value (number of neighbours). Args: - n_neighbours (int): Number of neighbours to be used. + n_neighbours: Number of neighbours to be used. Returns: - The value of the normalized cut. + (int): The value of the normalized cut. """ @@ -256,12 +257,12 @@ def _normalized_cut(self, n_neighbours): return cut - def _best_minimum_cut(self, min_k, max_k): + def _best_minimum_cut(self, min_k: int, max_k: int) -> None: """Performs a minimum cut on the subgraph using the best `k` value. Args: - min_k (int): Minimum value of k. - max_k (int): Maximum value of k. + min_k: Minimum value of k. + max_k: Maximum value of k. """ @@ -318,13 +319,18 @@ def _best_minimum_cut(self, min_k, max_k): logger.debug("Best: %d | Minimum cut: %d.", best_k, min_cut) - def fit(self, X_train, Y_train=None, I_train=None): + def fit( + self, + X_train: np.array, + Y_train: Optional[np.array] = None, + I_train: Optional[np.array] = None, + ) -> None: """Fits data in the classifier. Args: - X_train (np.array): Array of training features. - Y_train (np.array): Array of training labels. - I_train (np.array): Array of training indexes. + X_train: Array of training features. + Y_train: Array of training labels. + I_train: Array of training indexes. """ @@ -352,15 +358,15 @@ def fit(self, X_train, Y_train=None, I_train=None): logger.info("Number of clusters: %d.", self.subgraph.n_clusters) logger.info("Clustering time: %s seconds.", train_time) - def predict(self, X_val, I_val=None): + def predict(self, X_val: np.array, I_val: Optional[np.array] = None) -> List[int]: """Predicts new data using the pre-trained classifier. Args: - X_val (np.array): Array of validation features. - I_val (np.array): Array of validation indexes. + X_val: Array of validation features. + I_val: Array of validation indexes. Returns: - A list of predictions for each record of the data. + (List[int]): A list of predictions for each record of the data. """ @@ -482,7 +488,7 @@ def predict(self, X_val, I_val=None): return preds, clusters - def propagate_labels(self): + def propagate_labels(self) -> None: """Runs through the clusters and propagate the clusters roots labels to the samples.""" logger.info("Assigning predicted labels from clusters ...")