From 424088feb3a669334edee92b8e8ae678442afaa5 Mon Sep 17 00:00:00 2001
From: pablomm <pablomm@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:40:08 +0200
Subject: [PATCH] Add initial Pytorch models module

---
 README.md                 |   5 +-
 dmf/__init__.py           |   5 +-
 dmf/models/__init__.py    |  18 +++++++
 dmf/models/device.py      |  33 ++++++++++++
 dmf/models/memory.py      | 107 ++++++++++++++++++++++++++++++++++++++
 dmf/models/seed.py        |  42 +++++++++++++++
 dmf/utils/format_bytes.py |  45 ++++++++++++++++
 docs/index.rst            |   1 +
 docs/modules/index.rst    |   1 +
 docs/modules/models.rst   |  74 ++++++++++++++++++++++++++
 docs/modules/video.rst    |   2 +-
 11 files changed, 328 insertions(+), 5 deletions(-)
 create mode 100644 dmf/models/__init__.py
 create mode 100644 dmf/models/device.py
 create mode 100644 dmf/models/memory.py
 create mode 100644 dmf/models/seed.py
 create mode 100644 dmf/utils/format_bytes.py
 create mode 100644 docs/modules/models.rst

diff --git a/README.md b/README.md
index 322d807..8d622a4 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@
 [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/memory-formation/dmf-utils/blob/main/LICENSE)
 
 
-DMF Utils is a Python package that provides a collection of utility functionalities to simplify common tasks in experiment and data analysis workflows. The package contains modules used by our group to facilitate tasks in neuroscience and artificial intelligence research.
+DMF Utils is a Python package that provides a collection of utility functionalities to simplify common tasks in experiment and data analysis workflows. The package contains modules used by our group to facilitate tasks in neuroscience research.
 
 ## Quick Start
 
@@ -37,6 +37,7 @@ See the [documentation](https://dmf-utils.readthedocs.io/) for more installation
 * [Alerts](https://dmf-utils.readthedocs.io/en/latest/modules/alerts.html): Get notified when a function finishes running and send messages or files to Slack and Telegram.
 * [Env](https://dmf-utils.readthedocs.io/en/latest/modules/env.html): Manage environment variables.
 * [IO (Input/Output)](https://dmf-utils.readthedocs.io/en/latest/modules/io.html): Load and save data from different formats, and manage compressed files.
+* [Models](https://dmf-utils.readthedocs.io/en/latest/modules/models.html): Utilities for PyTorch models.
 * [Video](https://dmf-utils.readthedocs.io/en/latest/modules/video.html): Utilities to work with video files.
 
 
@@ -48,7 +49,7 @@ For full documentation, visit [Read the Docs](https://dmf-utils.readthedocs.io/)
 
 ## Contributing
 
-This package is maintained by [Dynamics of Memory Formation (DMF)](https://brainvitge.org/groups/memory_formation/) at the University of Barcelona. We welcome contributions from the community. If you would like to contribute, please open an issue or a pull request.
+This package is maintained by [Dynamics of Memory Formation (DMF)](https://brainvitge.org/groups/memory_formation/) at the [University of Barcelona](https://web.ub.edu/en/web/ub/). If you would like to contribute, please open an issue or a pull request.
 
 ## License
 
diff --git a/dmf/__init__.py b/dmf/__init__.py
index 78e1cd2..c2a81c5 100644
--- a/dmf/__init__.py
+++ b/dmf/__init__.py
@@ -4,7 +4,7 @@
 
 from .__version__ import __version__
 
-subpackages = ["alerts", "io", "env", "video"]
+subpackages = ["alerts", "io", "env", "video", "models"]
 
 __getattr__, __dir__, __all__ = lazy.attach(__name__, subpackages)
 
@@ -13,6 +13,7 @@
     from . import io
     from . import env
     from . import video
+    from . import models
 
-__all__ = ["__version__", "alerts", "io", "env"]
+__all__ = ["__version__", "alerts", "io", "env", "video", "models"]
 
diff --git a/dmf/models/__init__.py b/dmf/models/__init__.py
new file mode 100644
index 0000000..ec848c0
--- /dev/null
+++ b/dmf/models/__init__.py
@@ -0,0 +1,18 @@
+from typing import TYPE_CHECKING
+
+import lazy_loader as lazy
+
+submod_attrs = {
+    "memory": ["free", "get_memory_stats"],
+    "seed": ["set_seed"],
+    "device": ["get_device"],
+}
+
+__getattr__, __dir__, __all__ = lazy.attach(__name__, submod_attrs=submod_attrs)
+
+if TYPE_CHECKING:
+    from .memory import free, get_memory_stats
+    from .seed import set_seed
+    from .device import get_device
+
+__all__ = ["free", "get_memory_stats", "set_seed", "get_device"]
diff --git a/dmf/models/device.py b/dmf/models/device.py
new file mode 100644
index 0000000..2879aa3
--- /dev/null
+++ b/dmf/models/device.py
@@ -0,0 +1,33 @@
+
+from typing import Optional, Union
+try:
+    import torch
+except ImportError:
+    raise ImportError("PyTorch is not installed. Please, "
+                      "install a suitable version of PyTorch.")
+
+def get_device(device: Optional[Union[str, torch.device]] = None) -> torch.device:
+    """Return the specified device.
+
+    Parameters
+    ----------
+    device : str or torch.device, optional
+        The device to use. If None, the default device is selected.
+
+    Returns
+    -------
+    torch.device
+        The selected device.
+    """
+
+    if device is None:
+        if torch.cuda.is_available():
+            device = torch.device("cuda")
+        elif torch.backends.mps.is_available():
+            device = torch.device("mps")
+        else:
+            device = torch.device("cpu")
+    else:
+        device = torch.device(device)
+
+    return device
\ No newline at end of file
diff --git a/dmf/models/memory.py b/dmf/models/memory.py
new file mode 100644
index 0000000..d07c063
--- /dev/null
+++ b/dmf/models/memory.py
@@ -0,0 +1,107 @@
+import gc
+from typing import Dict, Optional, Union, Any
+
+try:
+    import torch
+except ImportError:
+    raise ImportError("PyTorch is not installed. Please, "
+                      "install a suitable version of PyTorch.")
+
+from .device import get_device
+from ..utils.format_bytes import bytes_to_human_readable
+
+def free(*objects: Any) -> None:
+    """
+    Free the memory associated with the given objects, including PyTorch models, tensors, and other related objects.
+
+    Parameters
+    ----------
+    *objects : Any
+        The objects to free. Typically these are PyTorch models or tensors.
+
+    Notes
+    -----
+    This function handles CPU, CUDA, and MPS tensors/models by setting gradients to None, deleting the objects,
+    clearing the CUDA or MPS cache if necessary, and calling garbage collection.
+    """
+    for obj in objects:
+        if isinstance(obj, torch.nn.Module):
+            # Free the model parameters' gradients
+            for param in obj.parameters():
+                if param.grad is not None:
+                    param.grad = None
+            # Move the model to CPU before deleting (optional, depending on use case)
+            obj.to('cpu')
+
+        elif isinstance(obj, torch.Tensor):
+            # Free the tensor memory
+            if obj.grad is not None:
+                obj.grad = None
+            # Move tensor to CPU before deletion (optional)
+            obj = obj.cpu()
+
+        # Delete the object reference
+        del obj
+
+    # Handle CUDA and MPS cache clearing
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    if torch.backends.mps.is_available():
+        torch.mps.empty_cache()
+
+    # Explicitly run garbage collection to free up memory
+    gc.collect()
+
+
+def get_memory_stats(device: Optional[Union[str, torch.device]] = None, format_size: bool=False) -> Dict[str, Any]:
+    """
+    Get memory statistics for the specified device.
+
+    Parameters
+    ----------
+    device : str or torch.device, optional
+        The device to get memory statistics for. If None, automatically detects
+        the available device (CUDA, MPS, or CPU).
+    format_size : bool, optional
+        Whether to format the memory sizes in human-readable format (KB, MB, GB, TB). Default is False.
+
+    Returns
+    -------
+    dict
+        A dictionary containing memory statistics: free, occupied, reserved, and device.
+    """
+    # Determine the device if not provided
+    device = get_device(device)
+
+    memory_stats = {"device": str(device)}
+
+    try:
+        if device.type == "cuda":
+            # CUDA memory stats
+            memory_stats["free"] = torch.cuda.memory_free(device)
+            memory_stats["occupied"] = torch.cuda.memory_allocated(device)
+            memory_stats["reserved"] = torch.cuda.memory_reserved(device)
+        elif device.type == "mps":
+            # MPS memory stats (only supported in PyTorch 1.13+)
+            memory_stats["free"] = torch.mps.current_reserved_memory() - torch.mps.current_allocated_memory()
+            memory_stats["occupied"] = torch.mps.current_allocated_memory()
+            memory_stats["reserved"] = torch.mps.current_reserved_memory()
+        else:
+            # CPU memory stats using psutil
+            import psutil
+            virtual_mem = psutil.virtual_memory()
+            memory_stats["free"] = virtual_mem.available
+            memory_stats["occupied"] = virtual_mem.total - virtual_mem.available
+            memory_stats["reserved"] = virtual_mem.total
+
+    except Exception:
+        memory_stats["free"] = None
+        memory_stats["occupied"] = None
+        memory_stats["reserved"] = None
+
+    if format_size:
+        for key in ["free", "occupied", "reserved"]:
+            memory_stats[key] = bytes_to_human_readable(memory_stats[key])
+
+    return memory_stats
+
diff --git a/dmf/models/seed.py b/dmf/models/seed.py
new file mode 100644
index 0000000..18b0d4e
--- /dev/null
+++ b/dmf/models/seed.py
@@ -0,0 +1,42 @@
+import random
+import numpy as np
+import torch
+
+def set_seed(seed: int) -> "torch.Generator":
+    """
+    Set the seed for random number generation in Python, NumPy, and PyTorch to ensure reproducibility.
+
+    Parameters
+    ----------
+    seed : int
+        The seed value to set for random number generation.
+
+    Returns
+    -------
+    torch.Generator
+        The random number generator for PyTorch.
+    """
+    # Set the seed for Python's built-in random module
+    random.seed(seed)
+
+    # Set the seed for NumPy
+    np.random.seed(seed)
+
+    # Set the seed for PyTorch based on the available device
+    if torch.cuda.is_available():
+        # Set seed for CUDA devices
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)  # For multi-GPU setups
+        rg = torch.Generator(torch.cuda.current_device())
+    else:
+        # Set seed for CPU-only and MPS, since it's a CPU-based backend
+        torch.manual_seed(seed)
+        rg = torch.Generator()
+
+    # Ensure deterministic behavior in cuDNN (if applicable)
+    if torch.backends.cudnn.is_available():
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    rg.manual_seed(seed)
+    return rg
diff --git a/dmf/utils/format_bytes.py b/dmf/utils/format_bytes.py
new file mode 100644
index 0000000..32b75e0
--- /dev/null
+++ b/dmf/utils/format_bytes.py
@@ -0,0 +1,45 @@
+
+
+from typing import Optional
+
+def bytes_to_human_readable(num_bytes: Optional[int], decimal_places: int = 2, units = ["Bytes", "KB", "MB", "GB", "TB"]) -> str:
+    """
+    Convert a number in bytes into a human-readable string with appropriate units (KB, MB, GB, TB).
+    If num_bytes is None, return None.
+
+    Parameters
+    ----------
+    num_bytes : int
+        The number of bytes to convert. Can be a positive or negative integer.
+    decimal_places : int, optional
+        The number of decimal places to display for KB, MB, GB, and TB. Default is 2.
+
+    Returns
+    -------
+    str or None
+        The human-readable string representing the size in appropriate units, or None if input is None.
+    """
+    if num_bytes is None:
+        return None
+
+    # Define the units and thresholds (limited to TB)
+    
+    factor = 1024.0
+    size = abs(num_bytes)
+    unit_index = 0
+
+    while size >= factor and unit_index < len(units) - 1:
+        size /= factor
+        unit_index += 1
+
+    # Format the size based on the unit
+    if units[unit_index] == "Bytes":
+        size_str = f"{int(size)} {units[unit_index]}"
+    else:
+        size_str = f"{size:.{decimal_places}f} {units[unit_index]}"
+
+    # Add a minus sign for negative byte values
+    if num_bytes < 0:
+        size_str = f"-{size_str}"
+
+    return size_str
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index e2f7b55..bff4622 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -66,6 +66,7 @@ DMF Utils is designed in a modular way, allowing you to install only the compone
 - :doc:`modules/alerts`: Tools for sending notifications and alerts via Slack and Telegram.
 - :doc:`modules/env`: Facities to manage environemt variables.
 - :doc:`modules/io`: Input/output utilities for file handling and data management.
+- :doc:`modules/models`: Utilities for working with Pytorch.
 - :doc:`modules/video`: Utilities for reading and writing video files.
 
 
diff --git a/docs/modules/index.rst b/docs/modules/index.rst
index b827b84..8e28dc7 100644
--- a/docs/modules/index.rst
+++ b/docs/modules/index.rst
@@ -11,4 +11,5 @@ Below are the different modules available within the package:
    alerts
    env
    io
+   models
    video
diff --git a/docs/modules/models.rst b/docs/modules/models.rst
new file mode 100644
index 0000000..5a75d98
--- /dev/null
+++ b/docs/modules/models.rst
@@ -0,0 +1,74 @@
+Models
+======
+
+The `dmf.models` module provides utilities for tasks related to PyTorch models.
+
+This package is included in the `dmf-utils` core package and can be installed using the following command:
+
+.. code-block:: bash
+
+    pip install dmf-utils
+
+
+Howeve, you need to have installed PyTorch to use the functionalities in this module. 
+The installation instructions for PyTorch are provided below.
+
+Content
+---------
+
+The `dmf.models` module includes the following functions:
+
+.. autosummary::
+   :toctree: autosummary
+
+   dmf.models.free
+   dmf.models.get_memory_stats
+   dmf.models.get_device
+   dmf.models.set_seed
+
+
+Pytorch Installation
+--------------------
+
+For detailed information check the official `PyTorch installation guide <https://pytorch.org/get-started/locally/>`_.
+
+Linux + CUDA
+~~~~~~~~~~~~
+
+1. First, check your cuda version by running:
+
+.. code-block:: bash
+
+    nvcc --version
+
+If you dont have CUDA installed, you can install it by following the instructions on the `NVIDIA CUDA Toolkit <https://developer.nvidia.com/cuda-toolkit>`_ page.
+
+To install the latest version of PyTorch for your CUDA support, run:
+
+.. code-block:: bash
+
+    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+
+Make sure to replace `cu118` with the appropriate version matching your CUDA toolkit version.
+
+macOS + MPS
+~~~~~~~~~~~
+
+On macOS systems, especially those with Apple Silicon (M1, M2 chips), 
+you can use the Metal Performance Shaders (MPS) backend by installing PyTorch with:
+
+.. code-block:: bash
+
+    pip install torch torchvision torchaudio
+
+The MPS backend is automatically enabled when using PyTorch on compatible macOS devices.
+
+CPU-based
+~~~~~~~~~
+
+For environments without GPU support or when running on systems without CUDA or MPS capabilities,
+ you can install the CPU-only version of PyTorch:
+
+.. code-block:: bash
+
+    pip install torch torchvision torchaudio
\ No newline at end of file
diff --git a/docs/modules/video.rst b/docs/modules/video.rst
index 10194d6..87fc281 100644
--- a/docs/modules/video.rst
+++ b/docs/modules/video.rst
@@ -3,7 +3,7 @@ Video
 
 The `video` module in DMF Utils provides utilities for reading and writing video files. It offers functions to easily handle video frames, allowing you to write videos from frames and read videos into various formats such as NumPy arrays or PIL images.
 
-This module is included in the base package:
+To install dependencies for the `video` module, use the following command:
 
 .. code-block:: bash