diff --git a/.conda/meta.yaml b/.conda/meta.yaml
index 99a7e6a09..f73d7681f 100644
--- a/.conda/meta.yaml
+++ b/.conda/meta.yaml
@@ -24,13 +24,13 @@ requirements:
 
   run:
     - python>=3.8, <4.0
-    - pytorch >=1.9.1, <2.0.0
-    - torchvision >=0.10.1, <1.0.0
+    - pytorch >=2.0.0, <3.0.0
+    - torchvision >=0.15.0, <1.0.0
     - tqdm >=4.1.0
     - numpy >=1.17.2, <2.0.0
     - fastprogress >=1.0.0, <2.0.0
     - matplotlib >=3.0.0, <4.0.0
-    - pillow >=8.4.0
+    - pillow >=8.4.0, !=9.2.0
     - huggingface_hub >=0.4.0
 
 test:
diff --git a/.github/collect_env.py b/.github/collect_env.py
index 7b739d556..f3303b313 100644
--- a/.github/collect_env.py
+++ b/.github/collect_env.py
@@ -16,7 +16,8 @@
 import re
 import subprocess
 import sys
-from collections import namedtuple
+from pathlib import Path
+from typing import NamedTuple
 
 try:
     import holocron
@@ -44,21 +45,17 @@
 
 
 # System Environment Information
-SystemEnv = namedtuple(
-    "SystemEnv",
-    [
-        "holocron_version",
-        "torch_version",
-        "torchvision_version",
-        "os",
-        "python_version",
-        "is_cuda_available",
-        "cuda_runtime_version",
-        "nvidia_driver_version",
-        "nvidia_gpu_models",
-        "cudnn_version",
-    ],
-)
+class SystemEnv(NamedTuple):
+    holocron_version: str
+    torch_version: str
+    torchvision_version: str
+    os: str
+    python_version: str
+    is_cuda_available: bool
+    cuda_runtime_version: str
+    nvidia_driver_version: str
+    nvidia_gpu_models: str
+    cudnn_version: str
 
 
 def run(command):
@@ -134,18 +131,18 @@ def get_cudnn_version(run_lambda):
     # find will return 1 if there are permission errors or if not found
     if len(out) == 0 or rc not in (1, 0):
         lib = os.environ.get("CUDNN_LIBRARY")
-        if lib is not None and os.path.isfile(lib):
+        if lib is not None and Path(lib).is_file():
             return os.path.realpath(lib)
         return None
     files = set()
     for fn in out.split("\n"):
         fn = os.path.realpath(fn)  # eliminate symbolic links
-        if os.path.isfile(fn):
+        if Path(fn).is_file():
             files.add(fn)
     if not files:
         return None
     # Alphabetize the result because the order is non-deterministic otherwise
-    files = list(sorted(files))
+    files = sorted(files)
     if len(files) == 1:
         return files[0]
     result = "\n".join(files)
@@ -158,11 +155,11 @@ def get_nvidia_smi():
     if get_platform() == "win32":
         system_root = os.environ.get("SYSTEMROOT", "C:\\Windows")
         program_files_root = os.environ.get("PROGRAMFILES", "C:\\Program Files")
-        legacy_path = os.path.join(program_files_root, "NVIDIA Corporation", "NVSMI", smi)
-        new_path = os.path.join(system_root, "System32", smi)
+        legacy_path = Path(program_files_root) / "NVIDIA Corporation" / "NVSMI" / smi
+        new_path = Path(system_root) / "System32" / smi
         smis = [new_path, legacy_path]
         for candidate_smi in smis:
-            if os.path.exists(candidate_smi):
+            if Path(candidate_smi).exists():
                 smi = '"{}"'.format(candidate_smi)
                 break
     return smi
@@ -307,7 +304,7 @@ def maybe_start_on_next_line(string):
         "nvidia_gpu_models",
         "nvidia_driver_version",
     ]
-    all_cuda_fields = dynamic_cuda_fields + ["cudnn_version"]
+    all_cuda_fields = [*dynamic_cuda_fields, "cudnn_version"]
     all_dynamic_cuda_fields_missing = all(mutable_dict[field] is None for field in dynamic_cuda_fields)
     if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
         for field in all_cuda_fields:
diff --git a/.github/workflows/demo.yml b/.github/workflows/demo.yml
index f0b28954c..d540efe38 100644
--- a/.github/workflows/demo.yml
+++ b/.github/workflows/demo.yml
@@ -13,7 +13,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python
diff --git a/.github/workflows/doc-status.yml b/.github/workflows/doc-status.yml
index 56f025a5a..077bbcba8 100644
--- a/.github/workflows/doc-status.yml
+++ b/.github/workflows/doc-status.yml
@@ -9,7 +9,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v1
         with:
-          python-version: 3.8
+          python-version: 3.9
           architecture: x64
       - name: check status
         run: |
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 486eb7ddc..5e27be37d 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -12,16 +12,16 @@ jobs:
     steps:
       - uses: actions/checkout@v2
       - name: Build docker image
-        run: docker build . -t holocron:python3.8-slim
+        run: docker build . -t holocron:python3.9-slim
       - name: Run docker container
-        run: docker run holocron:python3.8-slim python -c 'import holocron'
+        run: docker run holocron:python3.9-slim python -c 'import holocron'
 
   api:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v4
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 6b3c9e10f..192caa3f6 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
         with:
diff --git a/.github/workflows/pull_requests.yml b/.github/workflows/pull_requests.yml
index de64b6365..514924195 100644
--- a/.github/workflows/pull_requests.yml
+++ b/.github/workflows/pull_requests.yml
@@ -12,7 +12,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.8
+          python-version: 3.9
           architecture: x64
       - name: Cache python modules
         uses: actions/cache@v2
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index f83769da7..fe486202d 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -13,7 +13,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v1
         with:
-          python-version: 3.8
+          python-version: 3.9
           architecture: x64
       - name: Cache python modules
         uses: actions/cache@v2
@@ -47,7 +47,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v1
         with:
-          python-version: 3.8
+          python-version: 3.9
           architecture: x64
       - name: Install package
         run: |
@@ -64,7 +64,7 @@ jobs:
         uses: conda-incubator/setup-miniconda@v2
         with:
           auto-update-conda: true
-          python-version: 3.8
+          python-version: 3.9
       - name: Install dependencies
         run: conda install -y conda-build conda-verify anaconda-client
       - name: Get release tag
@@ -91,7 +91,7 @@ jobs:
         uses: conda-incubator/setup-miniconda@v2
         with:
           auto-update-conda: true
-          python-version: 3.8
+          python-version: 3.9
       - name: Install package
         run: |
           conda install -c frgfm pylocron
diff --git a/.github/workflows/scripts.yml b/.github/workflows/scripts.yml
index 468a555df..5d5c19db8 100644
--- a/.github/workflows/scripts.yml
+++ b/.github/workflows/scripts.yml
@@ -13,7 +13,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python
diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
index 659b4ab65..b6b2446fe 100644
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python
@@ -26,32 +26,12 @@ jobs:
           ruff --version
           ruff check --diff .
 
-  isort:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python: [3.8]
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python
-        uses: actions/setup-python@v1
-        with:
-          python-version: ${{ matrix.python }}
-          architecture: x64
-      - name: Run isort
-        run: |
-          pip install isort
-          isort --version
-          isort .
-          if [ -n "$(git status --porcelain --untracked-files=no)" ]; then exit 1; else echo "All clear"; fi
-
   mypy:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python
@@ -68,37 +48,18 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -e . --upgrade
-          pip install mypy
+          pip install "mypy==1.4.1"
       - name: Run mypy
         run: |
           mypy --version
           mypy
 
-  pydocstyle:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python: [3.8]
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: ${{ matrix.python }}
-          architecture: x64
-      - name: Run pydocstyle
-        run: |
-          pip install pydocstyle[toml]
-          pydocstyle --version
-          pydocstyle
-
   black:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python
@@ -108,7 +69,7 @@ jobs:
           architecture: x64
       - name: Run black
         run: |
-          pip install "black==22.3.0"
+          pip install "black==23.3.0"
           black --version
           black --check --diff .
 
@@ -117,7 +78,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 26fc3b9a3..8a9e13e39 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: [3.8]
+        python: [3.9]
     steps:
       - uses: actions/checkout@v2
         with:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7f2966a23..9adda37ec 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,8 @@
+default_language_version:
+    python: python3.9
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
+    rev: v4.4.0
     hooks:
       - id: check-ast
       - id: check-yaml
@@ -16,15 +18,12 @@ repos:
       - id: no-commit-to-branch
         args: ['--branch', 'main']
   - repo: https://github.com/psf/black
-    rev: 22.3.0
+    rev: 23.3.0
     hooks:
       - id: black
-  - repo: https://github.com/pycqa/isort
-    rev: 5.10.1
-    hooks:
-      - id: isort
-        exclude: "(__init__.py)$"
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: 'v0.0.260'
+    rev: 'v0.0.289'
     hooks:
       - id: ruff
+        args:
+          - --fix
diff --git a/Makefile b/Makefile
index fc8732d90..76f4d502a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,12 @@
 # this target runs checks on all files
 quality:
-	isort . -c
 	ruff check .
 	mypy
-	pydocstyle
 	black --check .
 	bandit -r . -c pyproject.toml
-	autoflake -r .
 
 # this target runs checks on all files and potentially modifies some of them
 style:
-	isort .
 	black .
 	ruff --fix .
 
diff --git a/api/Dockerfile b/api/Dockerfile
index 57f497ca8..0d3db3904 100644
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -1,4 +1,4 @@
-FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim
+FROM tiangolo/uvicorn-gunicorn-fastapi:python3.9-slim
 
 WORKDIR /app
 
diff --git a/api/app/schemas.py b/api/app/schemas.py
index 4bda7ad91..c9f5fd1b3 100644
--- a/api/app/schemas.py
+++ b/api/app/schemas.py
@@ -7,5 +7,7 @@
 
 
 class ClsCandidate(BaseModel):
+    """Classification result"""
+
     value: str = Field(..., example="Wookie")
     confidence: float = Field(..., gte=0, lte=1)
diff --git a/api/app/vision.py b/api/app/vision.py
index c937583b7..5a37fa1e6 100644
--- a/api/app/vision.py
+++ b/api/app/vision.py
@@ -5,6 +5,7 @@
 
 import io
 import json
+from pathlib import Path
 
 import numpy as np
 import onnxruntime
@@ -16,7 +17,7 @@
 __all__ = ["decode_image", "classify_image"]
 
 # Download model config & checkpoint
-with open(hf_hub_download(cfg.HUB_REPO, filename="config.json"), "rb") as f:
+with Path(hf_hub_download(cfg.HUB_REPO, filename="config.json")).open("rb") as f:
     MODEL_CFG = json.load(f)
 
 ORT_SESSION = onnxruntime.InferenceSession(hf_hub_download(cfg.HUB_REPO, filename="model.onnx"))
@@ -35,7 +36,6 @@ def preprocess_image(pil_img: Image.Image) -> np.ndarray:
     Returns:
         the resized and normalized image of shape (1, C, H, W)
     """
-
     # Resizing (PIL takes (W, H) order for resizing)
     img = pil_img.resize(MODEL_CFG["input_shape"][-2:][::-1], Image.BILINEAR)
     # (H, W, C) --> (C, H, W)
diff --git a/api/tests/routes/test_classification.py b/api/tests/routes/test_classification.py
index 3dae57406..449b044a4 100644
--- a/api/tests/routes/test_classification.py
+++ b/api/tests/routes/test_classification.py
@@ -1,9 +1,8 @@
 import pytest
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio()
 async def test_classification(test_app_asyncio, mock_classification_image):
-
     response = await test_app_asyncio.post("/classification", files={"file": mock_classification_image})
     assert response.status_code == 200
     json_response = response.json()
diff --git a/demo/app.py b/demo/app.py
index 9657a036a..668369e84 100644
--- a/demo/app.py
+++ b/demo/app.py
@@ -5,6 +5,7 @@
 
 import argparse
 import json
+from pathlib import Path
 
 import gradio as gr
 import numpy as np
@@ -14,9 +15,8 @@
 
 
 def main(args):
-
     # Download model config & checkpoint
-    with open(hf_hub_download(args.repo, filename="config.json"), "rb") as f:
+    with Path(hf_hub_download(args.repo, filename="config.json")).open("rb") as f:
         cfg = json.load(f)
 
     ort_session = onnxruntime.InferenceSession(hf_hub_download(args.repo, filename="model.onnx"))
@@ -30,7 +30,6 @@ def preprocess_image(pil_img: Image.Image) -> np.ndarray:
         Returns:
             the resized and normalized image of shape (1, C, H, W)
         """
-
         # Resizing (PIL takes (W, H) order for resizing)
         img = pil_img.resize(cfg["input_shape"][-2:][::-1], Image.BILINEAR)
         # (H, W, C) --> (C, H, W)
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 328011cdc..a3325ff3e 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -15,7 +15,6 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-import os
 import sys
 import textwrap
 from datetime import datetime
@@ -23,11 +22,11 @@
 
 from tabulate import tabulate
 
-sys.path.insert(0, os.path.abspath("../.."))
+sys.path.insert(0, Path().resolve().parent.parent)
 
 
 import holocron
-import holocron.models as M
+from holocron import models
 
 # -- Project information -----------------------------------------------------
 
@@ -133,7 +132,7 @@ def patched_make_field(self, types, domain, items, **kw):
     # `kw` catches `env=None` needed for newer sphinx while maintaining
     #  backwards compatibility when passed along further down!
 
-    # type: (list, unicode, tuple) -> nodes.field  # noqa: F821
+    # type: (list, unicode, tuple) -> nodes.field
     def handle_item(fieldarg, content):
         par = nodes.paragraph()
         par += addnodes.literal_strong("", fieldarg)  # Patch: this line added
@@ -187,9 +186,7 @@ def inject_checkpoint_metadata(app, what, name, obj, options, lines):
     - then this hook is called automatically when building the docs, and it generates the text that gets
       used within the autoclass directive.
     """
-
     if obj.__name__.endswith(("_Checkpoint")):
-
         if len(obj) == 0:
             lines[:] = ["There are no available pre-trained checkpoints."]
             return
@@ -242,7 +239,7 @@ def inject_checkpoint_metadata(app, what, name, obj, options, lines):
             table.append(("commit", commit_str))
             # table.append(("Training args", meta.args))
 
-            column_widths = ["60"] + ["60"]
+            column_widths = ["60", "60"]
             " ".join(column_widths)
 
             table = tabulate(table, tablefmt="rst")
@@ -259,7 +256,7 @@ def generate_checkpoint_table(module, table_name, metrics):
     [c for checkpoint_enum in checkpoint_enums for c in checkpoint_enum]
 
     metrics_keys, metrics_names = zip(*metrics)
-    column_names = ["Checkpoint"] + list(metrics_names) + ["Params"] + ["Size (MB)"]  # Final column order
+    column_names = ["Checkpoint", *metrics_names, "Params", "Size (MB)"]  # Final column order
     column_names = [f"**{name}**" for name in column_names]  # Add bold
 
     content = []
@@ -282,7 +279,7 @@ def generate_checkpoint_table(module, table_name, metrics):
 
     generated_dir = Path("generated")
     generated_dir.mkdir(exist_ok=True)
-    with open(generated_dir / f"{table_name}_table.rst", "w+") as table_file:
+    with Path(generated_dir / f"{table_name}_table.rst").open("w+") as table_file:
         table_file.write(".. rst-class:: table-checkpoints\n")  # Custom CSS class, see custom_theme.css
         table_file.write(".. table::\n")
         table_file.write(f"    :widths: {widths_table} \n\n")
@@ -290,7 +287,7 @@ def generate_checkpoint_table(module, table_name, metrics):
 
 
 generate_checkpoint_table(
-    module=M,
+    module=models,
     table_name="classification",
     metrics=[("top1-accuracy", "Acc@1"), ("top5-accuracy", "Acc@5")],
 )
@@ -300,10 +297,10 @@ def generate_checkpoint_table(module, table_name, metrics):
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ["_static"]
 
+
 # Add googleanalytics id
 # ref: https://github.com/orenhecht/googleanalytics/blob/master/sphinxcontrib/googleanalytics.py
 def add_ga_javascript(app, pagename, templatename, context, doctree):
-
     metatags = context.get("metatags", "")
     metatags += """
     <!-- Global site tag (gtag.js) - Google Analytics -->
diff --git a/docs/source/models.rst b/docs/source/models.rst
index b850ee841..5ac92ce50 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -21,6 +21,9 @@ The output represents the classification scores for each output classes.
     darknet19 = models.darknet19(num_classes=10)
 
 
+Supported architectures
+-----------------------
+
 .. toctree::
   :caption: Supported architectures
   :maxdepth: 1
@@ -40,6 +43,8 @@ The output represents the classification scores for each output classes.
   models/repvgg
   models/mobileone
 
+Available checkpoints
+---------------------
 
 Here is the list of available checkpoints:
 
diff --git a/holocron/models/checkpoints.py b/holocron/models/checkpoints.py
index 1b293e7ae..8ae092ecc 100644
--- a/holocron/models/checkpoints.py
+++ b/holocron/models/checkpoints.py
@@ -43,7 +43,7 @@ class Metric(str, Enum):
 
 
 class Dataset(str, Enum):
-    """Evaluation dataset"""
+    """Training/evaluation dataset"""
 
     IMAGENET1K = "imagenet-1k"
     IMAGENETTE = "imagenette"
@@ -52,12 +52,16 @@ class Dataset(str, Enum):
 
 @dataclass
 class Evaluation:
+    """Results of model evaluation"""
+
     dataset: Dataset
     results: Dict[Metric, float]
 
 
 @dataclass
 class LoadingMeta:
+    """Metadata to load the model"""
+
     url: str
     sha256: str
     size: int
@@ -68,6 +72,8 @@ class LoadingMeta:
 
 @dataclass
 class PreProcessing:
+    """Preprocessing metadata for the model"""
+
     input_shape: Tuple[int, ...]
     mean: Tuple[float, ...]
     std: Tuple[float, ...]
@@ -76,6 +82,8 @@ class PreProcessing:
 
 @dataclass
 class Checkpoint:
+    """Data required to run a model in the exact same condition than the checkpoint"""
+
     # What to expect
     evaluation: Evaluation
     # How to load it
@@ -91,7 +99,6 @@ def _handle_legacy_pretrained(
     checkpoint: Union[Checkpoint, None] = None,
     default_checkpoint: Union[Checkpoint, None] = None,
 ) -> Union[Checkpoint, None]:
-
     checkpoint = checkpoint or (default_checkpoint if pretrained else None)
 
     if pretrained and checkpoint is None:
diff --git a/holocron/models/classification/convnext.py b/holocron/models/classification/convnext.py
index 885169f10..0413126ad 100644
--- a/holocron/models/classification/convnext.py
+++ b/holocron/models/classification/convnext.py
@@ -15,18 +15,8 @@
 
 from holocron.nn import GlobalAvgPool2d
 
-from ..checkpoints import (
-    Checkpoint,
-    Dataset,
-    Evaluation,
-    LoadingMeta,
-    Metric,
-    PreProcessing,
-    TrainingRecipe,
-    _handle_legacy_pretrained,
-)
-from ..presets import IMAGENETTE
-from ..utils import _configure_model, conv_sequence
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
+from ..utils import _checkpoint, _configure_model, conv_sequence
 from .resnet import _ResBlock
 
 __all__ = [
@@ -136,7 +126,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         stochastic_depth_prob: float = 0.0,
     ) -> None:
-
         if conv_layer is None:
             conv_layer = nn.Conv2d
         if norm_layer is None:
@@ -162,7 +151,6 @@ def __init__(
         block_idx = 0
         tot_blocks = sum(num_blocks)
         for _num_blocks, _planes, _oplanes in zip(num_blocks, planes, planes[1:] + [planes[-1]]):
-
             # adjust stochastic depth probability based on the depth of the stage block
             sd_probs = [stochastic_depth_prob * (block_idx + _idx) / (tot_blocks - 1.0) for _idx in range(_num_blocks)]
             _stage: List[nn.Module] = [
@@ -215,24 +203,7 @@ def _convnext(
     return _configure_model(model, checkpoint, progress=progress)
 
 
-def _checkpoint(
-    arch: str, url: str, acc1: float, acc5: float, sha256: str, size: int, num_params: int, commit: str, train_args: str
-) -> Checkpoint:
-    return Checkpoint(
-        evaluation=Evaluation(
-            dataset=Dataset.IMAGENETTE,
-            results={Metric.TOP1_ACC: acc1, Metric.TOP5_ACC: acc5},
-        ),
-        meta=LoadingMeta(
-            url=url, sha256=sha256, size=size, num_params=num_params, arch=arch, categories=IMAGENETTE.classes
-        ),
-        pre_processing=PreProcessing(input_shape=(3, 224, 224), mean=IMAGENETTE.mean, std=IMAGENETTE.std),
-        recipe=TrainingRecipe(commit=commit, script="references/classification/train.py", args=train_args),
-    )
-
-
 class ConvNeXt_Atto_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="convnext_atto",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/convnext_atto_224-f38217e7.pth",
@@ -264,6 +235,7 @@ def convnext_atto(
         pretrained: If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
@@ -289,6 +261,7 @@ def convnext_femto(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
@@ -307,6 +280,7 @@ def convnext_pico(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
@@ -325,6 +299,7 @@ def convnext_nano(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
@@ -343,6 +318,7 @@ def convnext_tiny(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
@@ -361,6 +337,7 @@ def convnext_small(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
@@ -379,6 +356,7 @@ def convnext_base(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
@@ -397,6 +375,7 @@ def convnext_large(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
@@ -415,6 +394,7 @@ def convnext_xl(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _convnext
 
     Returns:
         torch.nn.Module: classification model
diff --git a/holocron/models/classification/darknet.py b/holocron/models/classification/darknet.py
index 88f14cdc6..a7855da2a 100644
--- a/holocron/models/classification/darknet.py
+++ b/holocron/models/classification/darknet.py
@@ -37,7 +37,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         if act_layer is None:
             act_layer = nn.LeakyReLU(0.1, inplace=True)
 
@@ -67,7 +66,7 @@ def __init__(
                         "layers",
                         nn.Sequential(
                             *[
-                                self._make_layer([_in_chans] + planes, act_layer, norm_layer, drop_layer, conv_layer)
+                                self._make_layer([_in_chans, *planes], act_layer, norm_layer, drop_layer, conv_layer)
                                 for _in_chans, planes in zip(in_chans, layout)
                             ]
                         ),
@@ -154,11 +153,11 @@ def darknet24(pretrained: bool = False, progress: bool = True, **kwargs: Any) ->
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _darknet
 
     Returns:
         torch.nn.Module: classification model
     """
-
     return _darknet(
         "darknet24",
         pretrained,
diff --git a/holocron/models/classification/darknetv2.py b/holocron/models/classification/darknetv2.py
index c255602c4..21efb41e5 100644
--- a/holocron/models/classification/darknetv2.py
+++ b/holocron/models/classification/darknetv2.py
@@ -4,6 +4,7 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
 from collections import OrderedDict
+from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
@@ -12,10 +13,11 @@
 from holocron.nn import GlobalAvgPool2d
 from holocron.nn.init import init_module
 
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
 from ..presets import IMAGENETTE
-from ..utils import conv_sequence, load_pretrained_params
+from ..utils import _checkpoint, _configure_model, conv_sequence
 
-__all__ = ["DarknetV2", "darknet19"]
+__all__ = ["DarknetV2", "Darknet19_Checkpoint", "darknet19"]
 
 
 default_cfgs: Dict[str, Dict[str, Any]] = {
@@ -39,7 +41,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         if act_layer is None:
             act_layer = nn.LeakyReLU(0.1, inplace=True)
 
@@ -140,7 +141,6 @@ def _make_layer(
         return nn.Sequential(*layers)
 
     def forward(self, x: torch.Tensor) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-
         if self.passthrough:
             self.stem: nn.Sequential
             self.layers: nn.Sequential
@@ -166,7 +166,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         super().__init__(
             OrderedDict(
                 [
@@ -185,33 +184,60 @@ def __init__(
         init_module(self, "leaky_relu")
 
 
-def _darknet(arch: str, pretrained: bool, progress: bool, layout: List[Tuple[int, int]], **kwargs: Any) -> DarknetV2:
+def _darknet(
+    checkpoint: Union[Checkpoint, None],
+    progress: bool,
+    layout: List[Tuple[int, int]],
+    **kwargs: Any,
+) -> DarknetV2:
     # Build the model
     model = DarknetV2(layout, **kwargs)
-    model.default_cfg = default_cfgs[arch]  # type: ignore[assignment]
-    # Load pretrained parameters
-    if pretrained:
-        load_pretrained_params(model, default_cfgs[arch]["url"], progress)
-
-    return model
+    return _configure_model(model, checkpoint, progress=progress)
+
+
+class Darknet19_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="darknet19",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/darknet19_224-32fd3f97.pth",
+        acc1=0.9386,
+        acc5=0.9936,
+        sha256="32fd3f979586556554652d650c44a59747c7762d81140cadbcd795179a3877ec",
+        size=79387724,
+        num_params=19827626,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch darknet19 --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def darknet19(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DarknetV2:
+def darknet19(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> DarknetV2:
     """Darknet-19 from
     `"YOLO9000: Better, Faster, Stronger" <https://pjreddie.com/media/files/papers/YOLO9000.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        pretrained (bool): If True, returns a model pre-trained on ImageNette
+        checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _darknet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
-    return _darknet(
-        "darknet19",
+    .. autoclass:: holocron.models.Darknet19_Checkpoint
+        :members:
+    """
+    checkpoint = _handle_legacy_pretrained(
         pretrained,
-        progress,
-        [(64, 0), (128, 1), (256, 1), (512, 2), (1024, 2)],
-        **kwargs,
+        checkpoint,
+        Darknet19_Checkpoint.DEFAULT.value,
     )
+    return _darknet(checkpoint, progress, [(64, 0), (128, 1), (256, 1), (512, 2), (1024, 2)], **kwargs)
diff --git a/holocron/models/classification/darknetv3.py b/holocron/models/classification/darknetv3.py
index 11348c0e2..1fca6fb5d 100644
--- a/holocron/models/classification/darknetv3.py
+++ b/holocron/models/classification/darknetv3.py
@@ -4,7 +4,8 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
 from collections import OrderedDict
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from enum import Enum
+from typing import Any, Callable, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -12,20 +13,11 @@
 from holocron.nn import DropBlock2d, GlobalAvgPool2d
 from holocron.nn.init import init_module
 
-from ..presets import IMAGENETTE
-from ..utils import conv_sequence, load_pretrained_params
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
+from ..utils import _checkpoint, _configure_model, conv_sequence
 from .resnet import _ResBlock
 
-__all__ = ["DarknetV3", "darknet53"]
-
-
-default_cfgs: Dict[str, Dict[str, Any]] = {
-    "darknet53": {
-        **IMAGENETTE.__dict__,
-        "input_shape": (3, 256, 256),
-        "url": "https://github.com/frgfm/Holocron/releases/download/v0.1.2/darknet53_256-f57b8429.pth",
-    },
-}
+__all__ = ["DarknetV3", "Darknet53_Checkpoint", "darknet53"]
 
 
 class ResBlock(_ResBlock):
@@ -90,7 +82,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         if act_layer is None:
             act_layer = nn.LeakyReLU(0.1, inplace=True)
         if norm_layer is None:
@@ -143,7 +134,6 @@ def _make_layer(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> nn.Sequential:
-
         layers = conv_sequence(
             in_planes,
             out_planes,
@@ -166,7 +156,6 @@ def _make_layer(
         return nn.Sequential(*layers)
 
     def forward(self, x: torch.Tensor) -> Union[torch.Tensor, List[torch.Tensor]]:
-
         if self.num_features == 1:
             return super().forward(x)
 
@@ -194,7 +183,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         super().__init__(
             OrderedDict(
                 [
@@ -213,27 +201,60 @@ def __init__(
         init_module(self, "leaky_relu")
 
 
-def _darknet(arch: str, pretrained: bool, progress: bool, layout: List[Tuple[int, int]], **kwargs: Any) -> DarknetV3:
+def _darknet(
+    checkpoint: Union[Checkpoint, None],
+    progress: bool,
+    layout: List[Tuple[int, int]],
+    **kwargs: Any,
+) -> DarknetV3:
     # Build the model
     model = DarknetV3(layout, **kwargs)
-    model.default_cfg = default_cfgs[arch]  # type: ignore[assignment]
-    # Load pretrained parameters
-    if pretrained:
-        load_pretrained_params(model, default_cfgs[arch]["url"], progress)
-
-    return model
-
-
-def darknet53(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DarknetV3:
+    return _configure_model(model, checkpoint, progress=progress)
+
+
+class Darknet53_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="darknet53",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/darknet53_224-5015f3fd.pth",
+        acc1=0.9417,
+        acc5=0.9957,
+        sha256="5015f3fdf0963342e0c54790127350375ba269d871feed48f8328b2e43cf7819",
+        size=162584273,
+        num_params=40595178,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch darknet53 --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
+
+
+def darknet53(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> DarknetV3:
     """Darknet-53 from
     `"YOLOv3: An Incremental Improvement" <https://pjreddie.com/media/files/papers/YOLOv3.pdf>`_
 
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _darknet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
-    return _darknet("darknet53", pretrained, progress, [(64, 1), (128, 2), (256, 8), (512, 8), (1024, 4)], **kwargs)
+    .. autoclass:: holocron.models.Darknet53_Checkpoint
+        :members:
+    """
+    checkpoint = _handle_legacy_pretrained(
+        pretrained,
+        checkpoint,
+        Darknet53_Checkpoint.DEFAULT.value,
+    )
+    return _darknet(checkpoint, progress, [(64, 1), (128, 2), (256, 8), (512, 8), (1024, 4)], **kwargs)
diff --git a/holocron/models/classification/darknetv4.py b/holocron/models/classification/darknetv4.py
index d2b769781..e5f5d1f91 100644
--- a/holocron/models/classification/darknetv4.py
+++ b/holocron/models/classification/darknetv4.py
@@ -4,6 +4,7 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
 from collections import OrderedDict
+from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
@@ -12,11 +13,12 @@
 from holocron.nn import DropBlock2d, GlobalAvgPool2d
 from holocron.nn.init import init_module
 
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
 from ..presets import IMAGENETTE
-from ..utils import conv_sequence, load_pretrained_params
+from ..utils import _checkpoint, _configure_model, conv_sequence
 from .darknetv3 import ResBlock
 
-__all__ = ["DarknetV4", "cspdarknet53", "cspdarknet53_mish"]
+__all__ = ["DarknetV4", "CSPDarknet53_Checkpoint", "cspdarknet53", "CSPDarknet53_Mish_Checkpoint", "cspdarknet53_mish"]
 
 
 default_cfgs: Dict[str, Dict[str, Any]] = {
@@ -125,7 +127,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         super().__init__()
 
         if act_layer is None:
@@ -172,7 +173,6 @@ def __init__(
         self.num_features = num_features
 
     def forward(self, x: torch.Tensor) -> Union[torch.Tensor, List[torch.Tensor]]:
-
         if self.num_features == 1:
             return super().forward(x)
 
@@ -201,7 +201,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         super().__init__(
             OrderedDict(
                 [
@@ -227,48 +226,112 @@ def __init__(
         init_module(self, "leaky_relu")
 
 
-def _darknet(arch: str, pretrained: bool, progress: bool, layout: List[Tuple[int, int]], **kwargs: Any) -> DarknetV4:
+def _darknet(
+    checkpoint: Union[Checkpoint, None],
+    progress: bool,
+    layout: List[Tuple[int, int]],
+    **kwargs: Any,
+) -> DarknetV4:
     # Build the model
     model = DarknetV4(layout, **kwargs)
-    model.default_cfg = default_cfgs[arch]  # type: ignore[assignment]
-    # Load pretrained parameters
-    if pretrained:
-        load_pretrained_params(model, default_cfgs[arch]["url"], progress)
-
-    return model
+    return _configure_model(model, checkpoint, progress=progress)
+
+
+class CSPDarknet53_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="cspdarknet53",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/cspdarknet53_224-7a69463a.pth",
+        acc1=0.9450,
+        acc5=0.9964,
+        sha256="7a69463a4bd445beb6691dfd6ef7378efcf941f75d07d60034106ebedfcb82f8",
+        size=106732575,
+        num_params=26627434,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch cspdarknet53 --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def cspdarknet53(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DarknetV4:
+def cspdarknet53(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> DarknetV4:
     """CSP-Darknet-53 from
     `"CSPNet: A New Backbone that can Enhance Learning Capability of CNN" <https://arxiv.org/pdf/1911.11929.pdf>`_
 
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _darknet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
-    return _darknet("cspdarknet53", pretrained, progress, [(64, 1), (128, 2), (256, 8), (512, 8), (1024, 4)], **kwargs)
+    .. autoclass:: holocron.models.CSPDarknet53_Checkpoint
+        :members:
+    """
+    checkpoint = _handle_legacy_pretrained(
+        pretrained,
+        checkpoint,
+        CSPDarknet53_Checkpoint.DEFAULT.value,
+    )
+    return _darknet(checkpoint, progress, [(64, 1), (128, 2), (256, 8), (512, 8), (1024, 4)], **kwargs)
+
+
+class CSPDarknet53_Mish_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="cspdarknet53_mish",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/cspdarknet53_mish_224-1b660b3c.pth",
+        acc1=0.9465,
+        acc5=0.9969,
+        sha256="1b660b3cb144195100c99ee3b9b863c37a5b5a59619c8de8c588b3d2af954b15",
+        size=106737530,
+        num_params=26627434,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch cspdarknet53_mish --batch-size 32 --grad-acc 2 --mixup-alpha 0.2 --amp"
+            "  --device 0 --epochs 100 --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176"
+            " --val-resize-size 232 --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def cspdarknet53_mish(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DarknetV4:
+def cspdarknet53_mish(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> DarknetV4:
     """Modified version of CSP-Darknet-53 from
     `"CSPNet: A New Backbone that can Enhance Learning Capability of CNN" <https://arxiv.org/pdf/1911.11929.pdf>`_
     with Mish as activation layer and DropBlock as regularization layer.
 
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _darknet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
+    .. autoclass:: holocron.models.CSPDarknet53_Mish_Checkpoint
+        :members:
+    """
     kwargs["act_layer"] = nn.Mish(inplace=True)
     kwargs["drop_layer"] = DropBlock2d
 
-    return _darknet(
-        "cspdarknet53_mish", pretrained, progress, [(64, 1), (128, 2), (256, 8), (512, 8), (1024, 4)], **kwargs
+    checkpoint = _handle_legacy_pretrained(
+        pretrained,
+        checkpoint,
+        CSPDarknet53_Mish_Checkpoint.DEFAULT.value,
     )
+    return _darknet(checkpoint, progress, [(64, 1), (128, 2), (256, 8), (512, 8), (1024, 4)], **kwargs)
diff --git a/holocron/models/classification/mobileone.py b/holocron/models/classification/mobileone.py
index 180331a04..f4087de36 100644
--- a/holocron/models/classification/mobileone.py
+++ b/holocron/models/classification/mobileone.py
@@ -13,18 +13,8 @@
 
 from holocron.nn import GlobalAvgPool2d, init
 
-from ..checkpoints import (
-    Checkpoint,
-    Dataset,
-    Evaluation,
-    LoadingMeta,
-    Metric,
-    PreProcessing,
-    TrainingRecipe,
-    _handle_legacy_pretrained,
-)
-from ..presets import IMAGENETTE
-from ..utils import _configure_model, conv_sequence, fuse_conv_bn
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
+from ..utils import _checkpoint, _configure_model, conv_sequence, fuse_conv_bn
 
 __all__ = [
     "MobileOne_S0_Checkpoint",
@@ -200,7 +190,6 @@ def __init__(
         act_layer: Optional[nn.Module] = None,
         norm_layer: Optional[Callable[[int], nn.Module]] = None,
     ) -> None:
-
         if norm_layer is None:
             norm_layer = nn.BatchNorm2d
         if act_layer is None:
@@ -263,24 +252,7 @@ def _mobileone(
     return _configure_model(model, checkpoint, progress=progress)
 
 
-def _checkpoint(
-    arch: str, url: str, acc1: float, acc5: float, sha256: str, size: int, num_params: int, commit: str, train_args: str
-) -> Checkpoint:
-    return Checkpoint(
-        evaluation=Evaluation(
-            dataset=Dataset.IMAGENETTE,
-            results={Metric.TOP1_ACC: acc1, Metric.TOP5_ACC: acc5},
-        ),
-        meta=LoadingMeta(
-            url=url, sha256=sha256, size=size, num_params=num_params, arch=arch, categories=IMAGENETTE.classes
-        ),
-        pre_processing=PreProcessing(input_shape=(3, 224, 224), mean=IMAGENETTE.mean, std=IMAGENETTE.std),
-        recipe=TrainingRecipe(commit=commit, script="references/classification/train.py", args=train_args),
-    )
-
-
 class MobileOne_S0_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="mobileone_s0",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/mobileone_s0_224-9ddd1fe9.pth",
@@ -312,6 +284,7 @@ def mobileone_s0(
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _mobileone
 
     Returns:
         torch.nn.Module: classification model
@@ -328,7 +301,6 @@ def mobileone_s0(
 
 
 class MobileOne_S1_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="mobileone_s1",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/mobileone_s1_224-d4ec5433.pth",
@@ -360,6 +332,7 @@ def mobileone_s1(
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _mobileone
 
     Returns:
         torch.nn.Module: classification model
@@ -376,7 +349,6 @@ def mobileone_s1(
 
 
 class MobileOne_S2_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="mobileone_s2",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/mobileone_s2_224-b748859c.pth",
@@ -408,6 +380,7 @@ def mobileone_s2(
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _mobileone
 
     Returns:
         torch.nn.Module: classification model
@@ -424,7 +397,6 @@ def mobileone_s2(
 
 
 class MobileOne_S3_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="mobileone_s3",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/mobileone_s3_224-7f357baf.pth",
@@ -456,6 +428,7 @@ def mobileone_s3(
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _mobileone
 
     Returns:
         torch.nn.Module: classification model
diff --git a/holocron/models/classification/pyconv_resnet.py b/holocron/models/classification/pyconv_resnet.py
index fd46619a4..893ee61e3 100644
--- a/holocron/models/classification/pyconv_resnet.py
+++ b/holocron/models/classification/pyconv_resnet.py
@@ -39,7 +39,7 @@ def __init__(
         planes: int,
         stride: int = 1,
         downsample: Optional[Module] = None,
-        groups: List[int] = [1],
+        groups: Optional[List[int]] = None,
         base_width: int = 64,
         dilation: int = 1,
         act_layer: Optional[Module] = None,
@@ -48,7 +48,8 @@ def __init__(
         num_levels: int = 2,
         **kwargs: Any,
     ) -> None:
-
+        if groups is None:
+            groups = [1]
         width = int(planes * (base_width / 64.0)) * min(groups)
 
         super().__init__(
@@ -112,7 +113,6 @@ def _pyconvresnet(
     groups: List[List[int]],
     **kwargs: Any,
 ) -> ResNet:
-
     # Build the model
     model = ResNet(
         block,  # type: ignore[arg-type]
@@ -120,7 +120,7 @@ def _pyconvresnet(
         out_chans,
         stem_pool=False,
         width_per_group=width_per_group,
-        block_args=[dict(num_levels=len(group), groups=group) for group in groups],
+        block_args=[{"num_levels": len(group), "groups": group} for group in groups],
         **kwargs,
     )
     model.default_cfg = default_cfgs[arch]  # type: ignore[assignment]
@@ -138,11 +138,11 @@ def pyconv_resnet50(pretrained: bool = False, progress: bool = True, **kwargs: A
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _pyconvresnet
 
     Returns:
         torch.nn.Module: classification model
     """
-
     return _pyconvresnet(
         "pyconv_resnet50",
         pretrained,
@@ -163,11 +163,11 @@ def pyconvhg_resnet50(pretrained: bool = False, progress: bool = True, **kwargs:
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _pyconvresnet
 
     Returns:
         torch.nn.Module: classification model
     """
-
     return _pyconvresnet(
         "pyconvhg_resnet50",
         pretrained,
diff --git a/holocron/models/classification/repvgg.py b/holocron/models/classification/repvgg.py
index 3e595b4e2..0a1e56b6c 100644
--- a/holocron/models/classification/repvgg.py
+++ b/holocron/models/classification/repvgg.py
@@ -12,18 +12,8 @@
 
 from holocron.nn import GlobalAvgPool2d, init
 
-from ..checkpoints import (
-    Checkpoint,
-    Dataset,
-    Evaluation,
-    LoadingMeta,
-    Metric,
-    PreProcessing,
-    TrainingRecipe,
-    _handle_legacy_pretrained,
-)
-from ..presets import IMAGENETTE
-from ..utils import _configure_model, conv_sequence, fuse_conv_bn
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
+from ..utils import _checkpoint, _configure_model, conv_sequence, fuse_conv_bn
 
 __all__ = [
     "RepVGG",
@@ -81,7 +71,6 @@ def __init__(
             self.branches.append(norm_layer(planes))
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-
         if isinstance(self.branches, nn.Conv2d):
             out = self.branches(x)
         else:
@@ -148,7 +137,6 @@ def __init__(
         act_layer: Optional[nn.Module] = None,
         norm_layer: Optional[Callable[[int], nn.Module]] = None,
     ) -> None:
-
         if norm_layer is None:
             norm_layer = nn.BatchNorm2d
         if act_layer is None:
@@ -203,24 +191,7 @@ def _repvgg(
     return _configure_model(model, checkpoint, progress=progress)
 
 
-def _checkpoint(
-    arch: str, url: str, acc1: float, acc5: float, sha256: str, size: int, num_params: int, commit: str, train_args: str
-) -> Checkpoint:
-    return Checkpoint(
-        evaluation=Evaluation(
-            dataset=Dataset.IMAGENETTE,
-            results={Metric.TOP1_ACC: acc1, Metric.TOP5_ACC: acc5},
-        ),
-        meta=LoadingMeta(
-            url=url, sha256=sha256, size=size, num_params=num_params, arch=arch, categories=IMAGENETTE.classes
-        ),
-        pre_processing=PreProcessing(input_shape=(3, 224, 224), mean=IMAGENETTE.mean, std=IMAGENETTE.std),
-        recipe=TrainingRecipe(commit=commit, script="references/classification/train.py", args=train_args),
-    )
-
-
 class RepVGG_A0_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="repvgg_a0",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/repvgg_a0_224-d3f54b28.pth",
@@ -252,6 +223,7 @@ def repvgg_a0(
         pretrained: If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _repvgg
 
     Returns:
         torch.nn.Module: classification model
@@ -268,7 +240,6 @@ def repvgg_a0(
 
 
 class RepVGG_A1_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="repvgg_a1",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/repvgg_a1_224-8d3269fb.pth",
@@ -300,6 +271,7 @@ def repvgg_a1(
         pretrained: If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _repvgg
 
     Returns:
         torch.nn.Module: classification model
@@ -316,7 +288,6 @@ def repvgg_a1(
 
 
 class RepVGG_A2_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="repvgg_a2",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/repvgg_a2_224-cb442207.pth",
@@ -348,6 +319,7 @@ def repvgg_a2(
         pretrained: If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _repvgg
 
     Returns:
         torch.nn.Module: classification model
@@ -364,7 +336,6 @@ def repvgg_a2(
 
 
 class RepVGG_B0_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="repvgg_b0",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/repvgg_b0_224-fdcdd2b7.pth",
@@ -396,6 +367,7 @@ def repvgg_b0(
         pretrained: If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _repvgg
 
     Returns:
         torch.nn.Module: classification model
@@ -412,7 +384,6 @@ def repvgg_b0(
 
 
 class RepVGG_B1_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="repvgg_b1",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/repvgg_b1_224-3e5b28d7.pth",
@@ -444,6 +415,7 @@ def repvgg_b1(
         pretrained: If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _repvgg
 
     Returns:
         torch.nn.Module: classification model
@@ -460,7 +432,6 @@ def repvgg_b1(
 
 
 class RepVGG_B2_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="repvgg_b2",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/repvgg_b2_224-dc810d88.pth",
@@ -492,6 +463,7 @@ def repvgg_b2(
         pretrained: If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _repvgg
 
     Returns:
         torch.nn.Module: classification model
@@ -520,12 +492,10 @@ def repvgg_b3(
         pretrained: If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _repvgg
 
     Returns:
         torch.nn.Module: classification model
-
-    .. autoclass:: holocron.models.RepVGG_B3_Checkpoint
-        :members:
     """
     checkpoint = _handle_legacy_pretrained(
         pretrained,
diff --git a/holocron/models/classification/res2net.py b/holocron/models/classification/res2net.py
index ad79317c4..1c138df82 100644
--- a/holocron/models/classification/res2net.py
+++ b/holocron/models/classification/res2net.py
@@ -9,25 +9,17 @@
 """
 
 import math
-from typing import Any, Callable, Dict, List, Optional
+from enum import Enum
+from typing import Any, Callable, List, Optional, Union
 
 import torch
 import torch.nn as nn
 
-from ..presets import IMAGENETTE
-from ..utils import conv_sequence, load_pretrained_params
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
+from ..utils import _checkpoint, _configure_model, conv_sequence
 from .resnet import ResNet, _ResBlock
 
-__all__ = ["Bottle2neck", "res2net50_26w_4s"]
-
-
-default_cfgs: Dict[str, Dict[str, Any]] = {
-    "res2net50_26w_4s": {
-        **IMAGENETTE.__dict__,
-        "input_shape": (3, 224, 224),
-        "url": "https://github.com/frgfm/Holocron/releases/download/v0.1.2/res2net50_26w_4s_224-97cfc954.pth",
-    },
-}
+__all__ = ["Bottle2neck", "Res2Net50_26w_4s_Checkpoint", "res2net50_26w_4s"]
 
 
 class ScaleConv2d(nn.Module):
@@ -73,7 +65,6 @@ def __init__(
             self.downsample = None  # type: ignore[assignment]
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-
         # Split the channel dimension into groups of self.width channels
         split_x = torch.split(x, self.width, 1)
         out = []
@@ -151,8 +142,7 @@ def __init__(
 
 
 def _res2net(
-    arch: str,
-    pretrained: bool,
+    checkpoint: Union[Checkpoint, None],
     progress: bool,
     num_blocks: List[int],
     out_chans: List[int],
@@ -161,32 +151,60 @@ def _res2net(
     **kwargs: Any,
 ) -> ResNet:
     # Build the model
-    model = ResNet(
+    model = model = ResNet(
         Bottle2neck,  # type: ignore[arg-type]
         num_blocks,
         out_chans,
         width_per_group=width_per_group,
-        block_args=dict(scale=scale),
+        block_args={"scale": scale},
         **kwargs,
     )
-    model.default_cfg = default_cfgs[arch]  # type: ignore[assignment]
-    # Load pretrained parameters
-    if pretrained:
-        load_pretrained_params(model, default_cfgs[arch]["url"], progress)
-
-    return model
+    return _configure_model(model, checkpoint, progress=progress)
+
+
+class Res2Net50_26w_4s_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="res2net50_26w_4s",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/res2net50_26w_4s_224-345170e8.pth",
+        acc1=0.9394,
+        acc5=0.9941,
+        sha256="345170e8ff75d10330af55674090b0d9aa751e14b6f3b4a95bb8ea6cdd65be4b",
+        size=95020747,
+        num_params=23670610,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch res2net50_26w_4s --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def res2net50_26w_4s(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def res2net50_26w_4s(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """Res2Net-50 26wx4s from
     `"Res2Net: A New Multi-scale Backbone Architecture" <https://arxiv.org/pdf/1904.01169.pdf>`_
 
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _res2net
 
     Returns:
         torch.nn.Module: classification model
-    """
 
-    return _res2net("res2net50_26w_4s", pretrained, progress, [3, 4, 6, 3], [64, 128, 256, 512], 26, 4, **kwargs)
+    .. autoclass:: holocron.models.Res2Net50_26w_4s_Checkpoint
+        :members:
+    """
+    checkpoint = _handle_legacy_pretrained(
+        pretrained,
+        checkpoint,
+        Res2Net50_26w_4s_Checkpoint.DEFAULT.value,
+    )
+    return _res2net(checkpoint, progress, [3, 4, 6, 3], [64, 128, 256, 512], 26, 4, **kwargs)
diff --git a/holocron/models/classification/resnet.py b/holocron/models/classification/resnet.py
index c434b303d..197d433e6 100644
--- a/holocron/models/classification/resnet.py
+++ b/holocron/models/classification/resnet.py
@@ -4,6 +4,7 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
 from collections import OrderedDict
+from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Type, Union
 
 import torch.nn as nn
@@ -11,20 +12,26 @@
 
 from holocron.nn import GlobalAvgPool2d, init
 
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
 from ..presets import IMAGENET, IMAGENETTE
-from ..utils import conv_sequence, load_pretrained_params
+from ..utils import _checkpoint, _configure_model, conv_sequence
 
 __all__ = [
     "BasicBlock",
     "Bottleneck",
     "ResNet",
+    "ResNet18_Checkpoint",
     "resnet18",
+    "ResNet34_Checkpoint",
     "resnet34",
+    "ResNet50_Checkpoint",
     "resnet50",
     "resnet101",
     "resnet152",
+    "ResNeXt50_32x4d_Checkpoint",
     "resnext50_32x4d",
     "resnext101_32x8d",
+    "ResNet50D_Checkpoint",
     "resnet50d",
 ]
 
@@ -50,7 +57,6 @@
 
 
 class _ResBlock(nn.Module):
-
     expansion: int = 1
 
     def __init__(
@@ -82,7 +88,6 @@ def forward(self, x: Tensor) -> Tensor:
 
 
 class BasicBlock(_ResBlock):
-
     expansion: int = 1
 
     def __init__(
@@ -139,7 +144,6 @@ def __init__(
 
 
 class Bottleneck(_ResBlock):
-
     expansion: int = 4
 
     def __init__(
@@ -157,7 +161,6 @@ def __init__(
         conv_layer: Optional[Callable[..., nn.Module]] = None,
         **kwargs: Any,
     ) -> None:
-
         width = int(planes * (base_width / 64.0)) * groups
         super().__init__(
             [
@@ -238,7 +241,6 @@ def __init__(
         num_repeats: int = 1,
         block_args: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
     ) -> None:
-
         if conv_layer is None:
             conv_layer = nn.Conv2d
         if norm_layer is None:
@@ -312,7 +314,7 @@ def __init__(
         stride = 1
         # Block args
         if block_args is None:
-            block_args = dict(groups=1)
+            block_args = {"groups": 1}
         if not isinstance(block_args, list):
             block_args = [block_args] * len(num_blocks)
         for _num_blocks, _planes, _block_args in zip(num_blocks, planes, block_args):
@@ -372,7 +374,6 @@ def _make_layer(
         num_repeats: int = 1,
         block_args: Optional[Dict[str, Any]] = None,
     ) -> nn.Sequential:
-
         downsample = None
         if stride != 1 or in_planes != planes * block.expansion:
             # Downsampling from ResNet-D
@@ -420,9 +421,8 @@ def _make_layer(
                 **block_args,
             )
         ]
-
-        for _ in range(num_blocks - 1):
-            layers.append(
+        layers.extend(
+            [
                 block(
                     block.expansion * planes,
                     planes,
@@ -434,94 +434,208 @@ def _make_layer(
                     drop_layer=drop_layer,
                     **block_args,
                 )
-            )
+                for _ in range(num_blocks - 1)
+            ]
+        )
 
         return nn.Sequential(*layers)
 
 
 def _resnet(
-    arch: str,
-    pretrained: bool,
+    checkpoint: Union[Checkpoint, None],
     progress: bool,
     block: Type[Union[BasicBlock, Bottleneck]],
     num_blocks: List[int],
     out_chans: List[int],
     **kwargs: Any,
 ) -> ResNet:
-
-    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
-
     # Build the model
     model = ResNet(block, num_blocks, out_chans, **kwargs)
-    model.default_cfg = default_cfgs[arch]  # type: ignore[assignment]
-    # Load pretrained parameters
-    if pretrained:
-        load_pretrained_params(model, default_cfgs[arch]["url"], progress)
-
-    return model
+    return _configure_model(model, checkpoint, progress=progress)
+
+
+class ResNet18_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="resnet18",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/resnet18_224-fc07006c.pth",
+        acc1=0.9361,
+        acc5=0.9946,
+        sha256="fc07006c894cac8cf380fed699bc5a68463698753c954632f52bb8595040f781",
+        size=44787043,
+        num_params=11181642,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch resnet18 --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def resnet18(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """ResNet-18 from
     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
+        pretrained: If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, loads that checkpoint
+        progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _resnet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
-    return _resnet("resnet18", pretrained, progress, BasicBlock, [2, 2, 2, 2], [64, 128, 256, 512], **kwargs)
+    .. autoclass:: holocron.models.ResNet18_Checkpoint
+        :members:
+    """
+    checkpoint = _handle_legacy_pretrained(
+        pretrained,
+        checkpoint,
+        ResNet18_Checkpoint.DEFAULT.value,
+    )
+    return _resnet(checkpoint, progress, BasicBlock, [2, 2, 2, 2], [64, 128, 256, 512], **kwargs)
+
+
+class ResNet34_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="resnet34",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/resnet34_224-412b0792.pth",
+        acc1=0.9381,
+        acc5=0.9949,
+        sha256="412b07927cc1938ee3add8d0f6bb18b42786646182f674d75f1433d086914485",
+        size=85267035,
+        num_params=21289802,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch resnet34 --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def resnet34(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """ResNet-34 from
     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
+        pretrained: If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, load that checkpoint on the model
+        progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _resnet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
-    return _resnet("resnet34", pretrained, progress, BasicBlock, [3, 4, 6, 3], [64, 128, 256, 512], **kwargs)
+    .. autoclass:: holocron.models.ResNet34_Checkpoint
+        :members:
+    """
+    return _resnet(checkpoint, progress, BasicBlock, [3, 4, 6, 3], [64, 128, 256, 512], **kwargs)
+
+
+class ResNet50_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="resnet50",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/resnet50_224-5b913f0b.pth",
+        acc1=0.9378,
+        acc5=0.9954,
+        sha256="5b913f0b8148b483ba15541ab600cf354ca42b326e4896c4c3dbc51eb1e80e70",
+        size=94384682,
+        num_params=23528522,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch resnet50 --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def resnet50(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """ResNet-50 from
     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
+        pretrained: If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, load that checkpoint on the model
+        progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _resnet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
-    return _resnet("resnet50", pretrained, progress, Bottleneck, [3, 4, 6, 3], [64, 128, 256, 512], **kwargs)
+    .. autoclass:: holocron.models.ResNet50_Checkpoint
+        :members:
+    """
+    checkpoint = _handle_legacy_pretrained(
+        pretrained,
+        checkpoint,
+        ResNet50_Checkpoint.DEFAULT.value,
+    )
+    return _resnet(checkpoint, progress, Bottleneck, [3, 4, 6, 3], [64, 128, 256, 512], **kwargs)
+
+
+class ResNet50D_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="resnet50d",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/resnet50d_224-6218d936.pth",
+        acc1=0.9465,
+        acc5=0.9952,
+        sha256="6218d936fa67c0047f1ec65564213db538aa826d84f2df1d4fa3224531376e6c",
+        size=94464810,
+        num_params=23547754,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch resnet50d --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def resnet50d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def resnet50d(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """ResNet-50-D from
     `"Bag of Tricks for Image Classification with Convolutional Neural Networks"
     <https://arxiv.org/pdf/1812.01187.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
+        pretrained: If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, load that checkpoint on the model
+        progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _resnet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
+    .. autoclass:: holocron.models.ResNet50D_Checkpoint
+        :members:
+    """
     return _resnet(
-        "resnet50d",
-        pretrained,
+        checkpoint,
         progress,
         Bottleneck,
         [3, 4, 6, 3],
@@ -532,53 +646,92 @@ def resnet50d(pretrained: bool = False, progress: bool = True, **kwargs: Any) ->
     )
 
 
-def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def resnet101(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """ResNet-101 from
     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
+        pretrained: If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, load that checkpoint on the model
+        progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _resnet
 
     Returns:
         torch.nn.Module: classification model
     """
-
-    return _resnet("resnet101", pretrained, progress, Bottleneck, [3, 4, 23, 3], [64, 128, 256, 512], **kwargs)
+    return _resnet(checkpoint, progress, Bottleneck, [3, 4, 23, 3], [64, 128, 256, 512], **kwargs)
 
 
-def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def resnet152(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """ResNet-152 from
     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
+        pretrained: If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, load that checkpoint on the model
+        progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _resnet
 
     Returns:
         torch.nn.Module: classification model
     """
-
-    return _resnet("resnet152", pretrained, progress, Bottleneck, [3, 8, 86, 3], [64, 128, 256, 512], **kwargs)
+    return _resnet(checkpoint, progress, Bottleneck, [3, 8, 86, 3], [64, 128, 256, 512], **kwargs)
+
+
+class ResNeXt50_32x4d_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="resnext50_32x4d",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/resnext50_32x4d_224-5832c4ce.pth",
+        acc1=0.9455,
+        acc5=0.9949,
+        sha256="5832c4ce33522a9eb7a8b5abe31cf30621721a92d4f99b4b332a007d81d071fe",
+        size=92332638,
+        num_params=23000394,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch resnext50_32x4d --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
 
 
-def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def resnext50_32x4d(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """ResNeXt-50 from
     `"Aggregated Residual Transformations for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
+        pretrained: If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, load that checkpoint on the model
+        progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _resnet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
+    .. autoclass:: holocron.models.ResNeXt50_32x4d_Checkpoint
+        :members:
+    """
     kwargs["width_per_group"] = 4
-    block_args = dict(groups=32)
+    block_args = {"groups": 32}
     return _resnet(
-        "resnext50_32x4d",
-        pretrained,
+        checkpoint,
         progress,
         Bottleneck,
         [3, 4, 6, 3],
@@ -588,23 +741,28 @@ def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: A
     )
 
 
-def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def resnext101_32x8d(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """ResNeXt-101 from
     `"Aggregated Residual Transformations for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
+        pretrained: If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, load that checkpoint on the model
+        progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _resnet
 
     Returns:
         torch.nn.Module: classification model
     """
-
     kwargs["width_per_group"] = 8
-    block_args = dict(groups=32)
+    block_args = {"groups": 32}
     return _resnet(
-        "resnext101_32x8d",
-        pretrained,
+        checkpoint,
         progress,
         Bottleneck,
         [3, 4, 23, 3],
diff --git a/holocron/models/classification/rexnet.py b/holocron/models/classification/rexnet.py
index 1aa4d6b97..7aaa29cc4 100644
--- a/holocron/models/classification/rexnet.py
+++ b/holocron/models/classification/rexnet.py
@@ -3,27 +3,20 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
+import functools
+import operator
 from collections import OrderedDict
 from enum import Enum
 from math import ceil
 from typing import Any, Callable, Optional, Union
 
 import torch.nn as nn
+from torch import Tensor
 
 from holocron.nn import GlobalAvgPool2d, init
 
-from ..checkpoints import (
-    Checkpoint,
-    Dataset,
-    Evaluation,
-    LoadingMeta,
-    Metric,
-    PreProcessing,
-    TrainingRecipe,
-    _handle_legacy_pretrained,
-)
-from ..presets import IMAGENET, IMAGENETTE
-from ..utils import _configure_model, conv_sequence
+from ..checkpoints import Checkpoint, Dataset, _handle_legacy_pretrained
+from ..utils import _checkpoint, _configure_model, conv_sequence
 
 __all__ = [
     "SEBlock",
@@ -67,8 +60,7 @@ def __init__(
             *conv_sequence(channels // se_ratio, channels, nn.Sigmoid(), None, drop_layer, kernel_size=1, stride=1),
         )
 
-    def forward(self, x):
-
+    def forward(self, x: Tensor) -> Tensor:
         y = self.pool(x)
         y = self.conv(y)
         return x * y
@@ -143,7 +135,7 @@ def __init__(
         )
         self.conv = nn.Sequential(*_layers)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         out = self.conv(x)
         if self.use_shortcut:
             out[:, : self.in_channels] += x
@@ -179,7 +171,9 @@ def __init__(
         num_blocks = [1, 2, 2, 3, 3, 5]
         strides = [1, 2, 2, 2, 1, 2]
         num_blocks = [ceil(element * depth_mult) for element in num_blocks]
-        strides = sum([[element] + [1] * (num_blocks[idx] - 1) for idx, element in enumerate(strides)], [])
+        strides = functools.reduce(
+            operator.iadd, [[element] + [1] * (num_blocks[idx] - 1) for idx, element in enumerate(strides)], []
+        )
         depth = sum(num_blocks)
 
         stem_channel = 32 / width_mult if width_mult < 1.0 else 32
@@ -249,34 +243,7 @@ def _rexnet(
     return _configure_model(model, checkpoint, progress=progress)
 
 
-def _checkpoint(
-    arch: str,
-    url: str,
-    acc1: float,
-    acc5: float,
-    sha256: str,
-    size: int,
-    num_params: int,
-    commit: Union[str, None] = None,
-    train_args: Union[str, None] = None,
-    dataset: Dataset = Dataset.IMAGENETTE,
-) -> Checkpoint:
-    preset = IMAGENETTE if dataset == Dataset.IMAGENETTE else IMAGENET
-    return Checkpoint(
-        evaluation=Evaluation(
-            dataset=dataset,
-            results={Metric.TOP1_ACC: acc1, Metric.TOP5_ACC: acc5},
-        ),
-        meta=LoadingMeta(
-            url=url, sha256=sha256, size=size, num_params=num_params, arch=arch, categories=preset.classes
-        ),
-        pre_processing=PreProcessing(input_shape=(3, 224, 224), mean=preset.mean, std=preset.std),
-        recipe=TrainingRecipe(commit=commit, script="references/classification/train.py", args=train_args),
-    )
-
-
 class ReXNet1_0x_Checkpoint(Enum):
-
     # Porting of Ross Wightman's weights
     IMAGENET1K = _checkpoint(
         arch="rexnet1_0x",
@@ -321,6 +288,7 @@ def rexnet1_0x(
         pretrained (bool): If True, returns a model pre-trained on ImageNette
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _rexnet
 
     Returns:
         torch.nn.Module: classification model
@@ -337,7 +305,6 @@ def rexnet1_0x(
 
 
 class ReXNet1_3x_Checkpoint(Enum):
-
     # Porting of Ross Wightman's weights
     IMAGENET1K = _checkpoint(
         arch="rexnet1_3x",
@@ -382,6 +349,7 @@ def rexnet1_3x(
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _rexnet
 
     Returns:
         torch.nn.Module: classification model
@@ -398,7 +366,6 @@ def rexnet1_3x(
 
 
 class ReXNet1_5x_Checkpoint(Enum):
-
     # Porting of Ross Wightman's weights
     IMAGENET1K = _checkpoint(
         arch="rexnet1_5x",
@@ -443,6 +410,7 @@ def rexnet1_5x(
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _rexnet
 
     Returns:
         torch.nn.Module: classification model
@@ -459,7 +427,6 @@ def rexnet1_5x(
 
 
 class ReXNet2_0x_Checkpoint(Enum):
-
     # Porting of Ross Wightman's weights
     IMAGENET1K = _checkpoint(
         arch="rexnet2_0x",
@@ -504,6 +471,7 @@ def rexnet2_0x(
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _rexnet
 
     Returns:
         torch.nn.Module: classification model
@@ -520,7 +488,6 @@ def rexnet2_0x(
 
 
 class ReXNet2_2x_Checkpoint(Enum):
-
     IMAGENETTE = _checkpoint(
         arch="rexnet2_2x",
         url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/rexnet2_2x_224-b23b2847.pth",
@@ -553,6 +520,7 @@ def rexnet2_2x(
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _rexnet
 
     Returns:
         torch.nn.Module: classification model
diff --git a/holocron/models/classification/sknet.py b/holocron/models/classification/sknet.py
index 6bea4f5e1..35fcfeeda 100644
--- a/holocron/models/classification/sknet.py
+++ b/holocron/models/classification/sknet.py
@@ -3,18 +3,20 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
-from typing import Any, Callable, Dict, List, Optional
+from enum import Enum
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import torch
 import torch.nn as nn
 
 from holocron.nn import GlobalAvgPool2d
 
+from ..checkpoints import Checkpoint, _handle_legacy_pretrained
 from ..presets import IMAGENETTE
-from ..utils import conv_sequence, load_pretrained_params
+from ..utils import _checkpoint, _configure_model, conv_sequence
 from .resnet import ResNet, _ResBlock
 
-__all__ = ["SoftAttentionLayer", "SKConv2d", "SKBottleneck", "sknet50", "sknet101", "sknet152"]
+__all__ = ["SoftAttentionLayer", "SKConv2d", "SKBottleneck", "SKNet50_Checkpoint", "sknet50", "sknet101", "sknet152"]
 
 
 default_cfgs: Dict[str, Dict[str, Any]] = {
@@ -105,7 +107,6 @@ def __init__(
         self.sa = SoftAttentionLayer(out_channels, sa_ratio, m, act_layer, norm_layer, drop_layer)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-
         paths = torch.stack([path_conv(x) for path_conv in self.path_convs], dim=1)
 
         b, m, c = paths.shape[:3]
@@ -117,7 +118,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 class SKBottleneck(_ResBlock):
-
     expansion: int = 4
 
     def __init__(
@@ -135,7 +135,6 @@ def __init__(
         conv_layer: Optional[Callable[..., nn.Module]] = None,
         **kwargs: Any,
     ) -> None:
-
         width = int(planes * (base_width / 64.0)) * groups
         super().__init__(
             [
@@ -171,64 +170,102 @@ def __init__(
 
 
 def _sknet(
-    arch: str,
-    pretrained: bool,
+    checkpoint: Union[Checkpoint, None],
     progress: bool,
     num_blocks: List[int],
     out_chans: List[int],
     **kwargs: Any,
 ) -> ResNet:
-
     # Build the model
     model = ResNet(SKBottleneck, num_blocks, out_chans, **kwargs)  # type: ignore[arg-type]
-    model.default_cfg = default_cfgs[arch]  # type: ignore[assignment]
-    # Load pretrained parameters
-    if pretrained:
-        load_pretrained_params(model, default_cfgs[arch]["url"], progress)
-
-    return model
-
-
-def sknet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+    return _configure_model(model, checkpoint, progress=progress)
+
+
+class SKNet50_Checkpoint(Enum):
+    IMAGENETTE = _checkpoint(
+        arch="sknet50",
+        url="https://github.com/frgfm/Holocron/releases/download/v0.2.1/sknet50_224-e2349031.pth",
+        acc1=0.9437,
+        acc5=0.9954,
+        sha256="e2349031c838a4661cd729dbc7825605c9e0c966bd89bbcc9b39f0e324894d1f",
+        size=141253623,
+        num_params=35224394,
+        commit="6e32c5b578711a2ef3731a8f8c61760ed9f03e58",
+        train_args=(
+            "./imagenette2-320/ --arch sknet50 --batch-size 64 --mixup-alpha 0.2 --amp --device 0 --epochs 100"
+            " --lr 1e-3 --label-smoothing 0.1 --random-erase 0.1 --train-crop-size 176 --val-resize-size 232"
+            " --opt adamw --weight-decay 5e-2"
+        ),
+    )
+    DEFAULT = IMAGENETTE
+
+
+def sknet50(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """SKNet-50 from
     `"Selective Kernel Networks" <https://arxiv.org/pdf/1903.06586.pdf>`_
 
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _sknet
 
     Returns:
         torch.nn.Module: classification model
-    """
 
-    return _sknet("sknet50", pretrained, progress, [3, 4, 6, 3], [64, 128, 256, 512], **kwargs)
-
-
-def sknet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+    .. autoclass:: holocron.models.SKNet50_Checkpoint
+        :members:
+    """
+    checkpoint = _handle_legacy_pretrained(
+        pretrained,
+        checkpoint,
+        SKNet50_Checkpoint.DEFAULT.value,
+    )
+    return _sknet(checkpoint, progress, [3, 4, 6, 3], [64, 128, 256, 512], **kwargs)
+
+
+def sknet101(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """SKNet-101 from
     `"Selective Kernel Networks" <https://arxiv.org/pdf/1903.06586.pdf>`_
 
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _sknet
 
     Returns:
         torch.nn.Module: classification model
     """
+    return _sknet(checkpoint if pretrained else None, progress, [3, 4, 23, 3], [64, 128, 256, 512], **kwargs)
 
-    return _sknet("sknet101", pretrained, progress, [3, 4, 23, 3], [64, 128, 256, 512], **kwargs)
 
-
-def sknet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
+def sknet152(
+    pretrained: bool = False,
+    checkpoint: Union[Checkpoint, None] = None,
+    progress: bool = True,
+    **kwargs: Any,
+) -> ResNet:
     """SKNet-152 from
     `"Selective Kernel Networks" <https://arxiv.org/pdf/1903.06586.pdf>`_
 
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
+        checkpoint: If specified, the model's parameters will be set to the checkpoint's values
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _sknet
 
     Returns:
         torch.nn.Module: classification model
     """
-
-    return _sknet("sknet152", pretrained, progress, [3, 8, 86, 3], [64, 128, 256, 512], **kwargs)
+    return _sknet(checkpoint if pretrained else None, progress, [3, 8, 86, 3], [64, 128, 256, 512], **kwargs)
diff --git a/holocron/models/classification/tridentnet.py b/holocron/models/classification/tridentnet.py
index c03b87be2..5dcf5e738 100644
--- a/holocron/models/classification/tridentnet.py
+++ b/holocron/models/classification/tridentnet.py
@@ -25,7 +25,6 @@
 
 
 class TridentConv2d(nn.Conv2d):
-
     num_branches: int = 3
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
@@ -65,7 +64,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 class Tridentneck(_ResBlock):
-
     expansion: int = 4
 
     def __init__(
@@ -164,9 +162,9 @@ def tridentnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any)
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _tridentnet
 
     Returns:
         torch.nn.Module: classification model
     """
-
     return _tridentnet("tridentnet50", pretrained, progress, [3, 4, 6, 3], [64, 128, 256, 512], **kwargs)
diff --git a/holocron/models/detection/yolo.py b/holocron/models/detection/yolo.py
index 5d29a1f4b..259c6bed9 100644
--- a/holocron/models/detection/yolo.py
+++ b/holocron/models/detection/yolo.py
@@ -61,11 +61,11 @@ def _compute_losses(
             pred_o (torch.Tensor[N, H, W, num_anchors]): objectness scores
             pred_scores (torch.Tensor[N, H, W, num_anchors, num_classes]): classification probabilities
             target (list<dict>, optional): list of targets
+            ignore_high_iou (bool): ignore the intersections with high IoUs in the noobj penalty term
 
         Returns:
             dict: dictionary of losses
         """
-
         gt_boxes = [t["boxes"] for t in target]
         gt_labels = [t["labels"] for t in target]
 
@@ -87,7 +87,6 @@ def _compute_losses(
         is_noobj = torch.ones_like(pred_o, dtype=torch.bool)
 
         for idx in range(b):
-
             gt_xy = (gt_boxes[idx][:, :2] + gt_boxes[idx][:, 2:]) / 2
             gt_wh = gt_boxes[idx][:, 2:] - gt_boxes[idx][:, :2]
             gt_centers = torch.stack(
@@ -125,12 +124,12 @@ def _compute_losses(
         # Non-objectness loss
         noobj_loss += pred_o[is_noobj].pow(2).sum()
 
-        return dict(
-            obj_loss=self.lambda_obj * obj_loss / pred_boxes.shape[0],
-            noobj_loss=self.lambda_noobj * noobj_loss / pred_boxes.shape[0],
-            bbox_loss=self.lambda_coords * bbox_loss / pred_boxes.shape[0],
-            clf_loss=self.lambda_class * clf_loss / pred_boxes.shape[0],
-        )
+        return {
+            "obj_loss": self.lambda_obj * obj_loss / pred_boxes.shape[0],
+            "noobj_loss": self.lambda_noobj * noobj_loss / pred_boxes.shape[0],
+            "bbox_loss": self.lambda_coords * bbox_loss / pred_boxes.shape[0],
+            "clf_loss": self.lambda_class * clf_loss / pred_boxes.shape[0],
+        }
 
     @staticmethod
     def to_isoboxes(b_coords: Tensor, grid_shape: Tuple[int, int], clamp: bool = False) -> Tensor:
@@ -154,8 +153,8 @@ def post_process(
         b_o: Tensor,
         b_scores: Tensor,
         grid_shape: Tuple[int, int],
-        rpn_nms_thresh=0.7,
-        box_score_thresh=0.05,
+        rpn_nms_thresh: float = 0.7,
+        box_score_thresh: float = 0.05,
     ) -> List[Dict[str, Tensor]]:
         """Perform final filtering to produce detections
 
@@ -163,13 +162,13 @@ def post_process(
             b_coords (torch.Tensor[N, H * W * num_anchors, 4]): relative coordinates in format (x, y, w, h)
             b_o (torch.Tensor[N, H * W * num_anchors]): objectness scores
             b_scores (torch.Tensor[N, H * W * num_anchors, num_classes]): classification scores
+            grid_shape (Tuple[int, int]): the size of the grid
             rpn_nms_thresh (float, optional): IoU threshold for NMS
             box_score_thresh (float, optional): minimum classification confidence threshold
 
         Returns:
             list<dict>: detections dictionary
         """
-
         # Convert box coords
         pred_xyxy = self.to_isoboxes(
             b_coords.reshape(-1, *grid_shape, self.num_anchors, 4),  # type: ignore[call-overload]
@@ -179,7 +178,6 @@ def post_process(
 
         detections = []
         for idx in range(b_coords.shape[0]):
-
             coords = torch.zeros((0, 4), dtype=b_o.dtype, device=b_o.device)
             scores = torch.zeros(0, dtype=b_o.dtype, device=b_o.device)
             labels = torch.zeros(0, dtype=torch.long, device=b_o.device)
@@ -203,7 +201,7 @@ def post_process(
                 scores = scores[kept_idxs]
                 labels = labels[kept_idxs]
 
-            detections.append(dict(boxes=coords, scores=scores, labels=labels))
+            detections.append({"boxes": coords, "scores": scores, "labels": labels})
 
         return detections
 
@@ -229,7 +227,6 @@ def __init__(
         conv_layer: Optional[Callable[..., nn.Module]] = None,
         backbone_norm_layer: Optional[Callable[[int], nn.Module]] = None,
     ) -> None:
-
         super().__init__(
             num_classes, rpn_nms_thresh, box_score_thresh, lambda_obj, lambda_noobj, lambda_class, lambda_coords
         )
@@ -313,7 +310,6 @@ def _format_outputs(self, x: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
             torch.Tensor[N, H * W, num_anchors]: objectness scores
             torch.Tensor[N, H * W, num_anchors, num_classes]: classification scores
         """
-
         b, _ = x.shape
         h, w = 7, 7
         # (B, H * W * (num_anchors * 5 + num_classes)) --> (B, H, W, num_anchors * 5 + num_classes)
@@ -332,7 +328,6 @@ def _format_outputs(self, x: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
         return b_coords, b_o, b_scores
 
     def _forward(self, x: Tensor) -> Tensor:
-
         out = self.backbone(x)
         out = self.block4(out)
         out = self.classifier(out)
@@ -350,7 +345,6 @@ def forward(
             target (list<dict>, optional): each dict must have two keys `boxes` of type torch.Tensor[-1, 4]
             and `labels` of type torch.Tensor[-1]
         """
-
         if self.training and target is None:
             raise ValueError("`target` needs to be specified in training mode")
 
@@ -380,7 +374,6 @@ def forward(
 def _yolo(
     arch: str, pretrained: bool, progress: bool, pretrained_backbone: bool, layout: List[List[int]], **kwargs: Any
 ) -> YOLOv1:
-
     if pretrained:
         pretrained_backbone = False
 
@@ -463,11 +456,11 @@ def yolov1(pretrained: bool = False, progress: bool = True, pretrained_backbone:
         pretrained (bool, optional): If True, returns a model pre-trained on ImageNet
         progress (bool, optional): If True, displays a progress bar of the download to stderr
         pretrained_backbone (bool, optional): If True, backbone parameters will have been pretrained on Imagenette
+        kwargs: keyword args of _yolo
 
     Returns:
         torch.nn.Module: detection module
     """
-
     return _yolo(
         "yolov1",
         pretrained,
diff --git a/holocron/models/detection/yolov2.py b/holocron/models/detection/yolov2.py
index 0c14a2fff..4f8b93f16 100644
--- a/holocron/models/detection/yolov2.py
+++ b/holocron/models/detection/yolov2.py
@@ -48,7 +48,6 @@ def __init__(
         conv_layer: Optional[Callable[..., nn.Module]] = None,
         backbone_norm_layer: Optional[Callable[[int], nn.Module]] = None,
     ) -> None:
-
         super().__init__(
             num_classes, rpn_nms_thresh, box_score_thresh, lambda_obj, lambda_noobj, lambda_class, lambda_coords
         )
@@ -165,7 +164,6 @@ def _format_outputs(self, x: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
             torch.Tensor[N, H, W, num_anchors]: objectness scores
             torch.Tensor[N, H, W, num_anchors, num_classes]: classification scores
         """
-
         b, _, h, w = x.shape
         # (B, C, H, W) --> (B, H, W, num_anchors, 5 + num_classes)
         x = x.reshape(b, self.num_anchors, 5 + self.num_classes, h, w).permute(0, 3, 4, 1, 2)
@@ -188,7 +186,6 @@ def _format_outputs(self, x: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
         return b_coords, b_o, b_scores
 
     def _forward(self, x: Tensor) -> Tensor:
-
         out, passthrough = self.backbone(x)
         # Downsample the feature map by stacking adjacent features on the channel dimension
         passthrough = self.passthrough_layer(passthrough)
@@ -213,7 +210,6 @@ def forward(
             target (list<dict>, optional): each dict must have two keys `boxes` of type torch.Tensor[-1, 4]
             and `labels` of type torch.Tensor[-1]
         """
-
         if self.training and target is None:
             raise ValueError("`target` needs to be specified in training mode")
 
@@ -248,7 +244,6 @@ def forward(
 def _yolo(
     arch: str, pretrained: bool, progress: bool, pretrained_backbone: bool, layout: List[Tuple[int, int]], **kwargs: Any
 ) -> YOLOv2:
-
     if pretrained:
         pretrained_backbone = False
 
@@ -297,11 +292,11 @@ def yolov2(pretrained: bool = False, progress: bool = True, pretrained_backbone:
         pretrained (bool, optional): If True, returns a model pre-trained on ImageNet
         progress (bool, optional): If True, displays a progress bar of the download to stderr
         pretrained_backbone (bool, optional): If True, backbone parameters will have been pretrained on Imagenette
+        kwargs: keyword args of _yolo
 
     Returns:
         torch.nn.Module: detection module
     """
-
     if pretrained_backbone:
         kwargs["backbone_norm_layer"] = FrozenBatchNorm2d
 
diff --git a/holocron/models/detection/yolov4.py b/holocron/models/detection/yolov4.py
index 5fc8c2f79..a7e0237e2 100644
--- a/holocron/models/detection/yolov4.py
+++ b/holocron/models/detection/yolov4.py
@@ -221,7 +221,6 @@ def __init__(
         init_module(self, "leaky_relu")
 
     def forward(self, feats: List[Tensor]) -> Tuple[Tensor, Tensor, Tensor]:
-
         out = self.fpn(feats[2])
 
         aux1 = self.pan1(out, feats[1])
@@ -304,14 +303,12 @@ def _format_outputs(self, output: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
     def post_process(
         boxes: Tensor, b_o: Tensor, b_scores: Tensor, rpn_nms_thresh: float = 0.7, box_score_thresh: float = 0.05
     ) -> List[Dict[str, Tensor]]:
-
         b_o = torch.sigmoid(b_o)
         b_scores = torch.sigmoid(b_scores)
 
         boxes = boxes.clamp_(0, 1)
         detections = []
         for idx in range(b_o.shape[0]):
-
             coords = torch.zeros((0, 4), dtype=torch.float32, device=b_o.device)
             scores = torch.zeros(0, dtype=torch.float32, device=b_o.device)
             labels = torch.zeros(0, dtype=torch.long, device=b_o.device)
@@ -334,14 +331,13 @@ def post_process(
                 scores = scores[kept_idxs]
                 labels = labels[kept_idxs]
 
-            detections.append(dict(boxes=coords, scores=scores, labels=labels))
+            detections.append({"boxes": coords, "scores": scores, "labels": labels})
 
         return detections
 
     def _build_targets(
         self, pred_boxes: Tensor, b_o: Tensor, b_scores: Tensor, target: List[Dict[str, Tensor]]
     ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
-
         b, h, w, num_anchors = b_o.shape
 
         # Target formatting
@@ -366,7 +362,6 @@ def _build_targets(
             device=b_o.device,
         )
         if target_selection.shape[0] > 0:
-
             # Anchors IoU
             gt_wh = _boxes[:, 2:] - _boxes[:, :2]
             anchor_idxs = box_iou(
@@ -399,7 +394,6 @@ def _compute_losses(
         b_scores: Tensor,
         target: List[Dict[str, Tensor]],
     ) -> Dict[str, Tensor]:
-
         target_o, target_scores, obj_mask, noobj_mask = self._build_targets(pred_boxes, b_o, b_scores, target)
 
         # Bbox regression
@@ -410,11 +404,11 @@ def _compute_losses(
 
         b_o = torch.sigmoid(b_o)
 
-        return dict(
-            obj_loss=self.lambda_obj * F.mse_loss(b_o[obj_mask], target_o[obj_mask], reduction="sum") / b_o.shape[0],
-            noobj_loss=self.lambda_noobj * b_o[noobj_mask].pow(2).sum() / b_o.shape[0],
-            bbox_loss=self.lambda_coords * bbox_loss / b_o.shape[0],
-            clf_loss=self.lambda_class
+        return {
+            "obj_loss": self.lambda_obj * F.mse_loss(b_o[obj_mask], target_o[obj_mask], reduction="sum") / b_o.shape[0],
+            "noobj_loss": self.lambda_noobj * b_o[noobj_mask].pow(2).sum() / b_o.shape[0],
+            "bbox_loss": self.lambda_coords * bbox_loss / b_o.shape[0],
+            "clf_loss": self.lambda_class
             * F.binary_cross_entropy_with_logits(
                 b_scores[obj_mask],
                 target_scores[obj_mask],
@@ -423,7 +417,7 @@ def _compute_losses(
             .mean(1)
             .sum(0)
             / b_o.shape[0],
-        )
+        }
 
     def forward(
         self, x: Tensor, target: Optional[List[Dict[str, Tensor]]] = None
@@ -436,7 +430,6 @@ def forward(
             target (list<dict>, optional): each dict must have two keys `boxes` of type torch.Tensor[*, 4]
                 and `labels` of type torch.Tensor[*]
         """
-
         if self.training and target is None:
             raise ValueError("`target` needs to be specified in training mode")
 
@@ -459,7 +452,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         # cf. https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg#L1143
         if anchors is None:
             anchors = (
@@ -636,13 +628,12 @@ def forward(
         y3 = self.yolo3(o3, target)
 
         if not self.training:
-
             detections = [
-                dict(
-                    boxes=torch.cat((det1["boxes"], det2["boxes"], det3["boxes"]), dim=0),
-                    scores=torch.cat((det1["scores"], det2["scores"], det3["scores"]), dim=0),
-                    labels=torch.cat((det1["labels"], det2["labels"], det3["labels"]), dim=0),
-                )
+                {
+                    "boxes": torch.cat((det1["boxes"], det2["boxes"], det3["boxes"]), dim=0),
+                    "scores": torch.cat((det1["scores"], det2["scores"], det3["scores"]), dim=0),
+                    "labels": torch.cat((det1["labels"], det2["labels"], det3["labels"]), dim=0),
+                }
                 for det1, det2, det3 in zip(y1, y2, y3)
             ]
             return detections
@@ -690,7 +681,6 @@ def __init__(
     def forward(
         self, x: Tensor, target: Optional[List[Dict[str, Tensor]]] = None
     ) -> Union[List[Dict[str, Tensor]], Dict[str, Tensor]]:
-
         if not isinstance(x, torch.Tensor):
             x = torch.stack(x, dim=0)
 
@@ -709,7 +699,6 @@ def _yolo(
     layout: List[Tuple[int, int]],
     **kwargs: Any,
 ) -> YOLOv4:
-
     if pretrained:
         pretrained_backbone = False
 
@@ -758,11 +747,11 @@ def yolov4(pretrained: bool = False, progress: bool = True, pretrained_backbone:
         pretrained (bool, optional): If True, returns a model pre-trained on ImageNet
         progress (bool, optional): If True, displays a progress bar of the download to stderr
         pretrained_backbone (bool, optional): If True, backbone parameters will have been pretrained on Imagenette
+        kwargs: keyword args of _yolo
 
     Returns:
         torch.nn.Module: detection module
     """
-
     if pretrained_backbone:
         kwargs["backbone_norm_layer"] = FrozenBatchNorm2d
 
diff --git a/holocron/models/segmentation/unet.py b/holocron/models/segmentation/unet.py
index 1a7f8a254..d892bd573 100644
--- a/holocron/models/segmentation/unet.py
+++ b/holocron/models/segmentation/unet.py
@@ -43,7 +43,6 @@ def down_path(
     drop_layer: Optional[Callable[..., nn.Module]] = None,
     conv_layer: Optional[Callable[..., nn.Module]] = None,
 ) -> nn.Sequential:
-
     layers: List[nn.Module] = [nn.MaxPool2d(2)] if downsample else []
     layers.extend(
         [
@@ -88,7 +87,6 @@ def __init__(
         )
 
     def forward(self, downfeats: Union[Tensor, List[Tensor]], upfeat: Tensor) -> Tensor:
-
         if not isinstance(downfeats, list):
             downfeats = [downfeats]
         # Upsample expansive features
@@ -117,13 +115,12 @@ def __init__(
         conv_layer: Optional[Callable[..., nn.Module]] = None,
         same_padding: bool = True,
     ) -> None:
-
         if act_layer is None:
             act_layer = nn.ReLU(inplace=True)
 
         # Contracting path
         _layers: List[nn.Module] = []
-        _layout = [in_channels] + layout
+        _layout = [in_channels, *layout]
         _pool = False
         for in_chan, out_chan in zip(_layout[:-1], _layout[1:]):
             _layers.append(
@@ -178,7 +175,7 @@ def __init__(
 
         # Contracting path
         self.encoder = nn.ModuleList([])
-        _layout = [in_channels] + layout
+        _layout = [in_channels, *layout]
         _pool = False
         for in_chan, out_chan in zip(_layout[:-1], _layout[1:]):
             self.encoder.append(
@@ -219,7 +216,6 @@ def __init__(
         init_module(self, "relu")
 
     def forward(self, x: Tensor) -> Tensor:
-
         xs: List[Tensor] = []
         # Contracting path
         for encoder in self.encoder:
@@ -277,7 +273,6 @@ def __init__(
         )
 
     def forward(self, downfeat: Tensor, upfeat: Tensor) -> Tensor:
-
         # Upsample expansive features
         _upfeat = self.upsample(upfeat)
 
@@ -367,7 +362,6 @@ def __init__(
         init_module(self, "relu")
 
     def forward(self, x: Tensor) -> Tensor:
-
         # Contracting path
         xs: List[Tensor] = list(self.encoder(x).values())
         x = self.bridge(xs[-1])
@@ -404,11 +398,11 @@ def unet(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> UNet
     Args:
         pretrained: If True, returns a model pre-trained on PASCAL VOC2012
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _unet
 
     Returns:
         semantic segmentation model
     """
-
     return _unet("unet", pretrained, progress, **kwargs)
 
 
@@ -441,11 +435,11 @@ def unet2(pretrained: bool = False, progress: bool = True, in_channels: int = 3,
         pretrained: If True, returns a model pre-trained on PASCAL VOC2012
         progress: If True, displays a progress bar of the download to stderr
         in_channels: number of input channels
+        kwargs: keyword args of _dynamic_unet
 
     Returns:
         semantic segmentation model
     """
-
     backbone = UNetBackbone(default_cfgs["unet2"]["encoder_layout"], in_channels=in_channels).features
 
     return _dynamic_unet("unet2", backbone, pretrained, progress, **kwargs)  # type: ignore[arg-type]
@@ -463,11 +457,11 @@ def unet_tvvgg11(
         pretrained: If True, returns a model pre-trained on PASCAL VOC2012
         pretrained_backbone: If True, the encoder will load pretrained parameters from ImageNet
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _dynamic_unet
 
     Returns:
         semantic segmentation model
     """
-
     backbone = vgg11(pretrained=pretrained_backbone and not pretrained).features
 
     return _dynamic_unet("unet_vgg11", backbone, pretrained, progress, **kwargs)
@@ -485,11 +479,11 @@ def unet_tvresnet34(
         pretrained: If True, returns a model pre-trained on PASCAL VOC2012
         pretrained_backbone: If True, the encoder will load pretrained parameters from ImageNet
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _dynamic_unet
 
     Returns:
         semantic segmentation model
     """
-
     backbone = resnet34(pretrained=pretrained_backbone and not pretrained)
     kwargs["final_upsampling"] = kwargs.get("final_upsampling", True)
 
@@ -512,11 +506,12 @@ def unet_rexnet13(
         pretrained: If True, returns a model pre-trained on PASCAL VOC2012
         pretrained_backbone: If True, the encoder will load pretrained parameters from ImageNet
         progress: If True, displays a progress bar of the download to stderr
+        in_channels: the number of input channels
+        kwargs: keyword args of _dynamic_unet
 
     Returns:
         semantic segmentation model
     """
-
     backbone = rexnet1_3x(pretrained=pretrained_backbone and not pretrained, in_channels=in_channels).features
     kwargs["final_upsampling"] = kwargs.get("final_upsampling", True)
     kwargs["act_layer"] = kwargs.get("act_layer", nn.SiLU(inplace=True))
diff --git a/holocron/models/segmentation/unet3p.py b/holocron/models/segmentation/unet3p.py
index 25e023cdb..6e494291f 100644
--- a/holocron/models/segmentation/unet3p.py
+++ b/holocron/models/segmentation/unet3p.py
@@ -32,7 +32,6 @@ def __init__(
         drop_layer: Optional[Callable[..., nn.Module]] = None,
         conv_layer: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
-
         super().__init__()
 
         # Check stem conv channels
@@ -71,8 +70,7 @@ def __init__(
             )
         )
 
-    def forward(self, downfeats: List[Tensor], feat: Tensor, upfeats: List[Tensor]):
-
+    def forward(self, downfeats: List[Tensor], feat: Tensor, upfeats: List[Tensor]) -> Tensor:
         if len(downfeats) != len(self.downsamples) or len(upfeats) != len(self.upsamples):
             raise ValueError(
                 f"Expected {len(self.downsamples)} encoding & {len(self.upsamples)} decoding features, "
@@ -124,7 +122,7 @@ def __init__(
 
         # Contracting path
         self.encoder = nn.ModuleList([])
-        _layout = [in_channels] + layout
+        _layout = [in_channels, *layout]
         _pool = False
         for in_chan, out_chan in zip(_layout[:-1], _layout[1:]):
             self.encoder.append(down_path(in_chan, out_chan, _pool, 1, act_layer, norm_layer, drop_layer, conv_layer))
@@ -151,7 +149,6 @@ def __init__(
         init_module(self, "relu")
 
     def forward(self, x: Tensor) -> Tensor:
-
         xs: List[Tensor] = []
         # Contracting path
         for encoder in self.encoder:
@@ -186,9 +183,9 @@ def unet3p(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> UN
     Args:
         pretrained: If True, returns a model pre-trained on PASCAL VOC2012
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _unet
 
     Returns:
         semantic segmentation model
     """
-
     return _unet("unet3p", pretrained, progress, **kwargs)  # type: ignore[return-value]
diff --git a/holocron/models/segmentation/unetpp.py b/holocron/models/segmentation/unetpp.py
index 9b192fbe2..0dec02c31 100644
--- a/holocron/models/segmentation/unetpp.py
+++ b/holocron/models/segmentation/unetpp.py
@@ -52,7 +52,7 @@ def __init__(
 
         # Contracting path
         self.encoder = nn.ModuleList([])
-        _layout = [in_channels] + layout
+        _layout = [in_channels, *layout]
         _pool = False
         for in_chan, out_chan in zip(_layout[:-1], _layout[1:]):
             self.encoder.append(down_path(in_chan, out_chan, _pool, 1, act_layer, norm_layer, drop_layer, conv_layer))
@@ -87,7 +87,6 @@ def __init__(
         init_module(self, "relu")
 
     def forward(self, x: Tensor) -> Tensor:
-
         xs: List[Tensor] = []
         # Contracting path
         for encoder in self.encoder:
@@ -134,7 +133,7 @@ def __init__(
 
         # Contracting path
         self.encoder = nn.ModuleList([])
-        _layout = [in_channels] + layout
+        _layout = [in_channels, *layout]
         _pool = False
         for in_chan, out_chan in zip(_layout[:-1], _layout[1:]):
             self.encoder.append(down_path(in_chan, out_chan, _pool, 1, act_layer, norm_layer, drop_layer, conv_layer))
@@ -178,7 +177,6 @@ def __init__(
         init_module(self, "relu")
 
     def forward(self, x: Tensor) -> Tensor:
-
         xs: List[List[Tensor]] = []
         # Contracting path
         for encoder in self.encoder:
@@ -218,11 +216,11 @@ def unetp(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> UNe
     Args:
         pretrained: If True, returns a model pre-trained on PASCAL VOC2012
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _unet
 
     Returns:
         semantic segmentation model
     """
-
     return _unet("unetp", pretrained, progress, **kwargs)  # type: ignore[return-value]
 
 
@@ -236,9 +234,9 @@ def unetpp(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> UN
     Args:
         pretrained: If True, returns a model pre-trained on PASCAL VOC2012
         progress: If True, displays a progress bar of the download to stderr
+        kwargs: keyword args of _unet
 
     Returns:
         semantic segmentation model
     """
-
     return _unet("unetpp", pretrained, progress, **kwargs)  # type: ignore[return-value]
diff --git a/holocron/models/utils.py b/holocron/models/utils.py
index a919a79f9..2ef9e9e5a 100644
--- a/holocron/models/utils.py
+++ b/holocron/models/utils.py
@@ -5,6 +5,7 @@
 
 import json
 import logging
+from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union
 
 import torch
@@ -15,7 +16,8 @@
 from holocron import models
 from holocron.nn import BlurPool2d
 
-from .checkpoints import Checkpoint, Dataset, Evaluation, LoadingMeta, PreProcessing, TrainingRecipe
+from .checkpoints import Checkpoint, Dataset, Evaluation, LoadingMeta, Metric, PreProcessing, TrainingRecipe
+from .presets import IMAGENET, IMAGENETTE
 
 __all__ = ["conv_sequence", "load_pretrained_params", "fuse_conv_bn", "model_from_hf_hub"]
 
@@ -55,7 +57,6 @@ def conv_sequence(
     Returns:
         a list of layers
     """
-
     if conv_layer is None:
         conv_layer = nn.Conv2d
     if bn_channels is None:
@@ -91,7 +92,15 @@ def load_pretrained_params(
     key_replacement: Optional[Tuple[str, str]] = None,
     key_filter: Optional[str] = None,
 ) -> None:
+    """Loads a checkpoint on a model given its URL.
 
+    Args:
+        model: PyTorch model
+        url: the URL of the checkpoint to download
+        progress: whether a progress br should be displayed when downloading the checkpoint
+        key_replacement: a mapping to replace keys in the checkpoint before loading them
+        key_filter: prefix of the checkpoint keys to be loaded
+    """
     if url is None:
         logging.warning("Invalid model URL, using default initialization.")
     else:
@@ -116,7 +125,6 @@ def fuse_conv_bn(conv: nn.Conv2d, bn: nn.BatchNorm2d) -> Tuple[torch.Tensor, tor
     Returns:
         the fused kernel and bias of the new convolution
     """
-
     # Check compatibility of both layers
     if bn.bias.data.shape[0] != conv.weight.data.shape[0]:
         raise AssertionError("expected same number of output channels for both `conv` and `bn`")
@@ -146,9 +154,8 @@ def model_from_hf_hub(repo_id: str, **kwargs: Any) -> nn.Module:
     Returns:
         Model loaded with the checkpoint
     """
-
     # Get the config
-    with open(hf_hub_download(repo_id, filename="config.json", **kwargs), "rb") as f:
+    with Path(hf_hub_download(repo_id, filename="config.json", **kwargs)).open("rb") as f:
         cfg = json.load(f)
 
     model = models.__dict__[cfg["arch"]](num_classes=len(cfg["classes"]), pretrained=False)
@@ -172,7 +179,6 @@ def _configure_model(
     checkpoint: Union[Checkpoint, None],
     **kwargs: Any,
 ) -> M:
-
     model.default_cfg = checkpoint  # type: ignore[assignment]
     # Load pretrained parameters
     if isinstance(checkpoint, Checkpoint):
@@ -195,3 +201,29 @@ def _checkpoint_from_hub_config(hub_config: Dict[str, Any]) -> Checkpoint:
         ),
         recipe=TrainingRecipe(commit=None, script="references/classification/train.py", args=None),
     )
+
+
+def _checkpoint(
+    arch: str,
+    url: str,
+    acc1: float,
+    acc5: float,
+    sha256: str,
+    size: int,
+    num_params: int,
+    commit: Union[str, None] = None,
+    train_args: Union[str, None] = None,
+    dataset: Dataset = Dataset.IMAGENETTE,
+) -> Checkpoint:
+    preset = IMAGENETTE if dataset == Dataset.IMAGENETTE else IMAGENET
+    return Checkpoint(
+        evaluation=Evaluation(
+            dataset=dataset,
+            results={Metric.TOP1_ACC: acc1, Metric.TOP5_ACC: acc5},
+        ),
+        meta=LoadingMeta(
+            url=url, sha256=sha256, size=size, num_params=num_params, arch=arch, categories=preset.classes
+        ),
+        pre_processing=PreProcessing(input_shape=(3, 224, 224), mean=preset.mean, std=preset.std),
+        recipe=TrainingRecipe(commit=commit, script="references/classification/train.py", args=train_args),
+    )
diff --git a/holocron/nn/functional.py b/holocron/nn/functional.py
index bc906cb0f..807fbbc91 100644
--- a/holocron/nn/functional.py
+++ b/holocron/nn/functional.py
@@ -36,7 +36,6 @@ def hard_mish(x: Tensor, inplace: bool = False) -> Tensor:
     Returns:
         output tensor
     """
-
     if inplace:
         return x.mul_(0.5 * (x + 2).clamp(min=0, max=2))
     return 0.5 * x * (x + 2).clamp(min=0, max=2)
@@ -52,7 +51,6 @@ def nl_relu(x: Tensor, beta: float = 1.0, inplace: bool = False) -> Tensor:
     Returns:
         output tensor
     """
-
     if inplace:
         return torch.log(F.relu_(x).mul_(beta).add_(1), out=x)
     return torch.log(1 + beta * F.relu(x))
@@ -80,7 +78,6 @@ def focal_loss(
     Returns:
         torch.Tensor: loss reduced with `reduction` method
     """
-
     # log(P[class]) = log_softmax(score)[class]
     logpt = F.log_softmax(x, dim=1)
 
@@ -128,7 +125,6 @@ def concat_downsample2d(x: Tensor, scale_factor: int) -> Tensor:
     Returns:
         torch.Tensor[N, scale_factor ** 2 * C, H / scale_factor, W / scale_factor]: downsampled tensor
     """
-
     b, c, h, w = x.shape
 
     if (h % scale_factor != 0) or (w % scale_factor != 0):
@@ -150,7 +146,6 @@ def z_pool(x: Tensor, dim: int) -> Tensor:
         x: input tensor
         dim: dimension to pool
     """
-
     return torch.cat([x.max(dim, keepdim=True).values, x.mean(dim, keepdim=True)], dim=dim)
 
 
@@ -169,7 +164,6 @@ def multilabel_cross_entropy(
     Returns:
         torch.Tensor: loss reduced with `reduction` method
     """
-
     # log(P[class]) = log_softmax(score)[class]
     logpt = F.log_softmax(x, dim=1)
 
@@ -221,7 +215,6 @@ def complement_cross_entropy(
     Returns:
         torch.Tensor: loss reduced with `reduction` method
     """
-
     ce_loss = F.cross_entropy(x, target, weight, ignore_index=ignore_index, reduction=reduction)
 
     if gamma == 0:
@@ -289,7 +282,6 @@ def mutual_channel_loss(
     Returns:
         torch.Tensor: loss reduced with `reduction` method
     """
-
     # Flatten spatial dimension
     b, c = x.shape[:2]
     spatial_dims = x.shape[2:]
@@ -342,7 +334,6 @@ def _xcorr2d(
     eps: float = 1e-14,
 ) -> Tensor:
     """Implements cross-correlation operation"""
-
     # Reshape input Tensor into properly sized slices
     h, w = x.shape[-2:]
     if isinstance(dilation, int):
@@ -385,7 +376,6 @@ def _convNd(x: Tensor, weight: Tensor) -> Tensor:
         x (torch.Tensor[N, num_slices, Cin * K1 * ...]): input Tensor
         weight (torch.Tensor[Cout, Cin, K1, ...]): filters
     """
-
     return x @ weight.view(weight.size(0), -1).t()
 
 
@@ -424,7 +414,6 @@ def norm_conv2d(
         >>> inputs = torch.randn(1,4,5,5)
         >>> F.norm_conv2d(inputs, filters, padding=1)
     """
-
     return _xcorr2d(_convNd, x, weight, bias, stride, padding, dilation, groups, True, eps)
 
 
@@ -435,7 +424,6 @@ def _addNd(x: Tensor, weight: Tensor) -> Tensor:
         x (torch.Tensor[N, num_slices, Cin * K1 * ...]): input Tensor
         weight (torch.Tensor[Cout, Cin, K1, ...]): filters
     """
-
     return -(x.unsqueeze(2) - weight.view(weight.size(0), -1)).abs().sum(-1)
 
 
@@ -475,7 +463,6 @@ def add2d(
         >>> inputs = torch.randn(1,4,5,5)
         >>> F.norm_conv2d(inputs, filters, padding=1)
     """
-
     return _xcorr2d(_addNd, x, weight, bias, stride, padding, dilation, groups, normalize_slices, eps)
 
 
@@ -490,7 +477,6 @@ def dropblock2d(x: Tensor, drop_prob: float, block_size: int, inplace: bool = Fa
         inplace (bool, optional): whether the operation should be done inplace
         training (bool, optional): whether the input should be processed in training mode
     """
-
     if not training or drop_prob == 0:
         return x
 
@@ -538,7 +524,6 @@ def dice_loss(
     Returns:
         torch.Tensor: loss reduced with `reduction` method
     """
-
     inter = gamma * (x * target).flatten(2).sum((0, 2))
     cardinality = (x + gamma * target).flatten(2).sum((0, 2))
 
@@ -578,7 +563,6 @@ def poly_loss(
     Returns:
         torch.Tensor: loss reduced with `reduction` method
     """
-
     # log(P[class]) = log_softmax(score)[class]
     logpt = F.log_softmax(x, dim=1)
 
diff --git a/holocron/nn/init.py b/holocron/nn/init.py
index 22b4bba68..d7acc6c16 100644
--- a/holocron/nn/init.py
+++ b/holocron/nn/init.py
@@ -14,7 +14,6 @@ def init_module(module: nn.Module, nonlinearity: str = "relu") -> None:
         module: module to initialize
         nonlinearity: linearity to initialize convolutions for
     """
-
     for m in module.modules():
         if isinstance(m, _ConvNd):
             nn.init.kaiming_normal_(m.weight.data, mode="fan_out", nonlinearity=nonlinearity)
diff --git a/holocron/nn/modules/activation.py b/holocron/nn/modules/activation.py
index 043b77be9..dbe1e8fd9 100644
--- a/holocron/nn/modules/activation.py
+++ b/holocron/nn/modules/activation.py
@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
-from typing import List
+from typing import ClassVar, List
 
 import torch
 import torch.nn as nn
@@ -15,8 +15,7 @@
 
 
 class _Activation(nn.Module):
-
-    __constants__: List[str] = ["inplace"]
+    __constants__: ClassVar[List[str]] = ["inplace"]
 
     def __init__(self, inplace: bool = False) -> None:
         super().__init__()
diff --git a/holocron/nn/modules/conv.py b/holocron/nn/modules/conv.py
index fcfcafe3f..04eafc6ed 100644
--- a/holocron/nn/modules/conv.py
+++ b/holocron/nn/modules/conv.py
@@ -32,8 +32,8 @@ def __init__(
         groups: int,
         bias: bool,
         padding_mode: str,
-        normalize_slices=False,
-        eps=1e-14,
+        normalize_slices: bool = False,
+        eps: float = 1e-14,
     ) -> None:
         super().__init__(
             in_channels,
@@ -335,7 +335,7 @@ def __init__(
         bias: bool = True,
         padding_mode: str = "zeros",
         r: int = 32,
-        L: int = 2,
+        L: int = 2,  # noqa: N803
     ) -> None:
         super().__init__()
         self.fc1 = nn.Conv2d(in_channels, max(in_channels // r, L), 1)
@@ -399,7 +399,6 @@ def __init__(
         groups: Optional[List[int]] = None,
         **kwargs: Any,
     ) -> None:
-
         if num_levels == 1:
             super().__init__(
                 [
@@ -437,8 +436,7 @@ def __init__(
             )
         self.num_levels = num_levels
 
-    def forward(self, x):
-
+    def forward(self, x: Tensor) -> Tensor:
         if self.num_levels == 1:
             return self[0].forward(x)
         return torch.cat([conv(x) for conv in self], dim=1)
@@ -474,7 +472,6 @@ def __init__(
         dilation: int = 1,
         reduction_ratio: float = 1,
     ) -> None:
-
         super().__init__()
 
         self.groups = groups
@@ -486,7 +483,6 @@ def __init__(
         self.unfold = nn.Unfold(kernel_size, dilation, padding, stride)
 
     def forward(self, x):
-
         # Kernel generation
         # (N, C, H, W) --> (N, C, H // s, W // s)
         kernel = self.pool(x) if isinstance(self.pool, nn.Module) else x
diff --git a/holocron/nn/modules/downsample.py b/holocron/nn/modules/downsample.py
index e2c21281c..d7e3ce1f2 100644
--- a/holocron/nn/modules/downsample.py
+++ b/holocron/nn/modules/downsample.py
@@ -36,7 +36,6 @@ def __init__(self, scale_factor: int) -> None:
         self.scale_factor = scale_factor
 
     def forward(self, x: Tensor) -> Tensor:
-
         return F.concat_downsample2d(x, self.scale_factor)
 
 
@@ -53,7 +52,6 @@ def __init__(self, scale_factor: int) -> None:
         self.scale_factor = scale_factor
 
     def __call__(self, x: Tensor) -> Tensor:
-
         return F.concat_downsample2d(x, self.scale_factor)
 
 
@@ -167,7 +165,7 @@ class SPP(nn.ModuleList):
     def __init__(self, kernel_sizes: List[int]) -> None:
         super().__init__([nn.MaxPool2d(k_size, stride=1, padding=k_size // 2) for k_size in kernel_sizes])
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         feats = [x] + [pool_layer(x) for pool_layer in self]
         return torch.cat(feats, dim=1)
 
diff --git a/holocron/ops/boxes.py b/holocron/ops/boxes.py
index cf991e1b5..11350fc8d 100644
--- a/holocron/ops/boxes.py
+++ b/holocron/ops/boxes.py
@@ -76,7 +76,6 @@ def iou_penalty(boxes1: Tensor, boxes2: Tensor) -> Tensor:
     Returns:
         torch.Tensor[M, N]: penalty term
     """
-
     # Diagonal length of the smallest enclosing box
     c2 = torch.zeros((boxes1.shape[0], boxes2.shape[0], 2), device=boxes1.device)
     # Assign bottom right coords
@@ -128,7 +127,6 @@ def diou_loss(boxes1: Tensor, boxes2: Tensor) -> Tensor:
     Returns:
         torch.Tensor[M, N]: Distance-IoU loss
     """
-
     return 1 - box_iou(boxes1, boxes2) + iou_penalty(boxes1, boxes2)
 
 
@@ -141,7 +139,6 @@ def aspect_ratio(boxes: Tensor) -> Tensor:
     Returns:
         torch.Tensor[N]: aspect ratio
     """
-
     return torch.atan((boxes[:, 2] - boxes[:, 0]) / (boxes[:, 3] - boxes[:, 1]))
 
 
@@ -155,7 +152,6 @@ def aspect_ratio_consistency(boxes1: Tensor, boxes2: Tensor) -> Tensor:
     Returns:
         torch.Tensor[M, N]: aspect ratio consistency
     """
-
     v = aspect_ratio(boxes1).unsqueeze(-1) - aspect_ratio(boxes2).unsqueeze(-2)
     v.pow_(2)
     v.mul_(4 / math.pi**2)
@@ -203,7 +199,6 @@ def ciou_loss(boxes1: Tensor, boxes2: Tensor) -> Tensor:
         >>> boxes2 = torch.tensor([[50, 50, 150, 150]], dtype=torch.float32)
         >>> box_ciou(boxes1, boxes2)
     """
-
     iou = box_iou(boxes1, boxes2)
     v = aspect_ratio_consistency(boxes1, boxes2)
 
diff --git a/holocron/optim/adabelief.py b/holocron/optim/adabelief.py
index 09a5a2802..2b3c15835 100644
--- a/holocron/optim/adabelief.py
+++ b/holocron/optim/adabelief.py
@@ -135,9 +135,7 @@ def adabelief(
     r"""Functional API that performs AdaBelief algorithm computation.
     See :class:`~holocron.optim.AdaBelief` for details.
     """
-
     for i, param in enumerate(params):
-
         grad = grads[i]
         exp_avg = exp_avgs[i]
         exp_avg_sq = exp_avg_sqs[i]
diff --git a/holocron/optim/adamp.py b/holocron/optim/adamp.py
index 74a84fa0f..37165e360 100644
--- a/holocron/optim/adamp.py
+++ b/holocron/optim/adamp.py
@@ -159,9 +159,7 @@ def adamp(
     r"""Functional API that performs AdamP algorithm computation.
     See :class:`~holocron.optim.AdamP` for details.
     """
-
     for i, param in enumerate(params):
-
         grad = grads[i]
         exp_avg = exp_avgs[i]
         exp_avg_sq = exp_avg_sqs[i]
diff --git a/holocron/optim/adan.py b/holocron/optim/adan.py
index c87687ef2..fd448ec87 100644
--- a/holocron/optim/adan.py
+++ b/holocron/optim/adan.py
@@ -162,9 +162,7 @@ def adan(
     r"""Functional API that performs Adan algorithm computation.
     See :class:`~holocron.optim.Adan` for details.
     """
-
     for i, param in enumerate(params):
-
         grad = grads[i]
         exp_avg = exp_avgs[i]
         exp_avg_sq = exp_avg_sqs[i]
diff --git a/holocron/optim/lamb.py b/holocron/optim/lamb.py
index 5ed3f6847..b8723011b 100644
--- a/holocron/optim/lamb.py
+++ b/holocron/optim/lamb.py
@@ -68,7 +68,7 @@ def __init__(
             raise ValueError(f"Invalid beta parameter at index 0: {betas[0]}")
         if not 0.0 <= betas[1] < 1.0:
             raise ValueError(f"Invalid beta parameter at index 1: {betas[1]}")
-        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
+        defaults = {"lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay}
         super().__init__(params, defaults)
         # LARS arguments
         self.scale_clip = scale_clip
diff --git a/holocron/optim/lars.py b/holocron/optim/lars.py
index 1586a5ffb..fcbbc03d3 100644
--- a/holocron/optim/lars.py
+++ b/holocron/optim/lars.py
@@ -67,7 +67,13 @@ def __init__(
         if weight_decay < 0.0:
             raise ValueError(f"Invalid weight_decay value: {weight_decay}")
 
-        defaults = dict(lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay, nesterov=nesterov)
+        defaults = {
+            "lr": lr,
+            "momentum": momentum,
+            "dampening": dampening,
+            "weight_decay": weight_decay,
+            "nesterov": nesterov,
+        }
         if nesterov and (momentum <= 0 or dampening != 0):
             raise ValueError("Nesterov momentum requires a momentum and zero dampening")
         super().__init__(params, defaults)
@@ -76,7 +82,7 @@ def __init__(
         if self.scale_clip is None:
             self.scale_clip = (0.0, 10.0)
 
-    def __setstate__(self, state: Dict[str, torch.Tensor]):
+    def __setstate__(self, state: Dict[str, torch.Tensor]) -> None:
         super().__setstate__(state)
         for group in self.param_groups:
             group.setdefault("nesterov", False)
diff --git a/holocron/optim/ralars.py b/holocron/optim/ralars.py
index 1459c0919..c1c1a8465 100644
--- a/holocron/optim/ralars.py
+++ b/holocron/optim/ralars.py
@@ -43,7 +43,7 @@ def __init__(
             raise ValueError(f"Invalid beta parameter at index 0: {betas[0]}")
         if not 0.0 <= betas[1] < 1.0:
             raise ValueError(f"Invalid beta parameter at index 1: {betas[1]}")
-        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
+        defaults = {"lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay}
         super(RaLars, self).__init__(params, defaults)
         # RAdam tweaks
         self.force_adaptive_momentum = force_adaptive_momentum
@@ -65,7 +65,6 @@ def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]
                 loss = closure()
 
         for group in self.param_groups:
-
             # Get group-shared variables
             beta1, beta2 = group["betas"]
             # Compute max length of SMA on first step
diff --git a/holocron/optim/tadam.py b/holocron/optim/tadam.py
index cec7f9f0b..eaa31befc 100644
--- a/holocron/optim/tadam.py
+++ b/holocron/optim/tadam.py
@@ -73,7 +73,7 @@ def __init__(
             raise ValueError(f"Invalid beta parameter at index 1: {betas[1]}")
         if not 0.0 <= weight_decay:
             raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
-        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, amsgrad=amsgrad, dof=dof)
+        defaults = {"lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay, "amsgrad": amsgrad, "dof": dof}
         super().__init__(params, defaults)
 
     def __setstate__(self, state: Dict[str, torch.Tensor]) -> None:
@@ -98,7 +98,7 @@ def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]
             grads = []
             exp_avgs = []
             exp_avg_sqs = []
-            W_ts = []
+            W_ts = []  # noqa: N806
             max_exp_avg_sqs = []
             state_steps = []
 
@@ -163,7 +163,7 @@ def tadam(
     exp_avgs: List[Tensor],
     exp_avg_sqs: List[Tensor],
     max_exp_avg_sqs: List[Tensor],
-    W_ts: List[Tensor],
+    W_ts: List[Tensor],  # noqa: N803
     state_steps: List[int],
     amsgrad: bool,
     beta1: float,
@@ -176,13 +176,11 @@ def tadam(
     r"""Functional API that performs TAdam algorithm computation.
     See :class:`~holocron.optim.TAdam` for details.
     """
-
     for i, param in enumerate(params):
-
         grad = grads[i]
         exp_avg = exp_avgs[i]
         exp_avg_sq = exp_avg_sqs[i]
-        W_t = W_ts[i]
+        W_t = W_ts[i]  # noqa: N806
         _dof = param.data.numel() if dof is None else dof
         step = state_steps[i]
         if amsgrad:
diff --git a/holocron/optim/wrapper.py b/holocron/optim/wrapper.py
index bc137ed21..fe13943b5 100644
--- a/holocron/optim/wrapper.py
+++ b/holocron/optim/wrapper.py
@@ -31,15 +31,15 @@ class Lookahead(Optimizer):
     def __init__(
         self,
         base_optimizer: torch.optim.Optimizer,
-        sync_rate=0.5,
-        sync_period=6,
+        sync_rate: float = 0.5,
+        sync_period: int = 6,
     ) -> None:
         if sync_rate < 0 or sync_rate > 1:
             raise ValueError(f"expected positive float lower than 1 as sync_rate, received: {sync_rate}")
         if not isinstance(sync_period, int) or sync_period < 1:
             raise ValueError(f"expected positive integer as sync_period, received: {sync_period}")
         # Optimizer attributes
-        self.defaults = dict(sync_rate=sync_rate, sync_period=sync_period)
+        self.defaults = {"sync_rate": sync_rate, "sync_period": sync_period}
         self.state = defaultdict(dict)
         # Base optimizer attributes
         self.base_optimizer = base_optimizer
@@ -76,7 +76,6 @@ def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]
         Arguments:
             closure (callable, optional): A closure that reevaluates the model and returns the loss.
         """
-
         # Update fast params
         loss = self.base_optimizer.step(closure)
         self.fast_steps += 1
@@ -101,9 +100,8 @@ def _add_param_group(self, param_group: Dict[str, Any]) -> None:
         Args:
             param_group (dict): parameter group of base_optimizer
         """
-
         # Clone & detach params from base optimizer
-        group = dict(params=[p.clone().detach() for p in param_group["params"]], lr=param_group["lr"])
+        group = {"params": [p.clone().detach() for p in param_group["params"]], "lr": param_group["lr"]}
         # Uneeded grads
         for p in group["params"]:
             p.reguires_grad = False
@@ -115,7 +113,6 @@ def add_param_group(self, param_group: Dict[str, Any]) -> None:
         Args:
             param_group (dict): parameter group
         """
-
         # Add param group to base optimizer
         self.base_optimizer.add_param_group(param_group)
 
@@ -129,7 +126,6 @@ def sync_params(self, sync_rate: float = 0.0) -> None:
         Args:
             sync_rate (float): synchronization rate of parameters
         """
-
         for fast_group, slow_group in zip(self.base_optimizer.param_groups, self.param_groups):
             for fast_p, slow_p in zip(fast_group["params"], slow_group["params"]):
                 # Outer update
@@ -152,22 +148,22 @@ class Scout(Optimizer):
 
     Args:
         base_optimizer (torch.optim.optimizer.Optimizer): base parameter optimizer
-        sync_rate (int, optional): rate of weight synchronization
+        sync_rate (float, optional): rate of weight synchronization
         sync_period (int, optional): number of step performed on fast weights before weight synchronization
     """
 
     def __init__(
         self,
         base_optimizer: torch.optim.Optimizer,
-        sync_rate=0.5,
-        sync_period=6,
+        sync_rate: float = 0.5,
+        sync_period: int = 6,
     ) -> None:
         if sync_rate < 0 or sync_rate > 1:
             raise ValueError(f"expected positive float lower than 1 as sync_rate, received: {sync_rate}")
         if not isinstance(sync_period, int) or sync_period < 1:
             raise ValueError(f"expected positive integer as sync_period, received: {sync_period}")
         # Optimizer attributes
-        self.defaults = dict(sync_rate=sync_rate, sync_period=sync_period)
+        self.defaults = {"sync_rate": sync_rate, "sync_period": sync_period}
         self.state = defaultdict(dict)
         # Base optimizer attributes
         self.base_optimizer = base_optimizer
@@ -177,10 +173,7 @@ def __init__(
         for group in self.base_optimizer.param_groups:
             self._add_param_group(group)
         # Buffer for scouting
-        self.buffer = []
-        for group in self.param_groups:
-            for p in group["params"]:
-                self.buffer.append(p.data.unsqueeze(0))
+        self.buffer = [p.data.unsqueeze(0) for group in self.param_groups for p in group["params"]]
 
     def __getstate__(self) -> Dict[str, Any]:
         return {
@@ -209,7 +202,6 @@ def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]
         Arguments:
             closure (callable, optional): A closure that reevaluates the model and returns the loss.
         """
-
         # Update fast params
         loss = self.base_optimizer.step(closure)
         self.fast_steps += 1
@@ -256,9 +248,8 @@ def _add_param_group(self, param_group: Dict[str, Any]) -> None:
         Args:
             param_group (dict): parameter group of base_optimizer
         """
-
         # Clone & detach params from base optimizer
-        group = dict(params=[p.clone().detach() for p in param_group["params"]], lr=param_group["lr"])
+        group = {"params": [p.clone().detach() for p in param_group["params"]], "lr": param_group["lr"]}
         # Uneeded grads
         for p in group["params"]:
             p.reguires_grad = False
@@ -270,7 +261,6 @@ def add_param_group(self, param_group: Dict[str, Any]) -> None:
         Args:
             param_group (dict): parameter group
         """
-
         # Add param group to base optimizer
         self.base_optimizer.add_param_group(param_group)
 
@@ -284,7 +274,6 @@ def sync_params(self, sync_rate: float = 0.0) -> None:
         Args:
             sync_rate (float): synchronization rate of parameters
         """
-
         for fast_group, slow_group in zip(self.base_optimizer.param_groups, self.param_groups):
             for fast_p, slow_p in zip(fast_group["params"], slow_group["params"]):
                 # Outer update
diff --git a/holocron/trainer/classification.py b/holocron/trainer/classification.py
index 06f39df2e..bf75b4b92 100644
--- a/holocron/trainer/classification.py
+++ b/holocron/trainer/classification.py
@@ -46,7 +46,6 @@ def evaluate(self) -> Dict[str, float]:
         Returns:
             dict: evaluation metrics
         """
-
         self.model.eval()
 
         val_loss, top1, top5, num_samples, num_valid_batches = 0.0, 0, 0, 0, 0
@@ -70,7 +69,7 @@ def evaluate(self) -> Dict[str, float]:
 
         val_loss /= num_valid_batches
 
-        return dict(val_loss=val_loss, acc1=top1 / num_samples, acc5=top5 / num_samples)
+        return {"val_loss": val_loss, "acc1": top1 / num_samples, "acc5": top5 / num_samples}
 
     @staticmethod
     def _eval_metrics_str(eval_metrics: Dict[str, float]) -> str:
@@ -88,7 +87,6 @@ def plot_top_losses(
         num_samples: int = 12,
         **kwargs: Any,
     ) -> None:
-
         # Record loss, prob, target, image
         losses = np.zeros(num_samples, dtype=np.float32)
         preds = np.zeros(num_samples, dtype=int)
@@ -210,7 +208,6 @@ def evaluate(self) -> Dict[str, float]:
         Returns:
             dict: evaluation metrics
         """
-
         self.model.eval()
 
         val_loss, top1, num_samples, num_valid_batches = 0.0, 0.0, 0, 0
@@ -230,8 +227,8 @@ def evaluate(self) -> Dict[str, float]:
 
         val_loss /= num_valid_batches
 
-        return dict(val_loss=val_loss, acc=top1 / num_samples)
+        return {"val_loss": val_loss, "acc": top1 / num_samples}
 
     @staticmethod
     def _eval_metrics_str(eval_metrics: Dict[str, float]) -> str:
-        return f"Validation loss: {eval_metrics['val_loss']:.4} " f"(Acc: {eval_metrics['acc']:.2%})"
+        return f"Validation loss: {eval_metrics['val_loss']:.4} (Acc: {eval_metrics['acc']:.2%})"
diff --git a/holocron/trainer/core.py b/holocron/trainer/core.py
index dba330d1d..37126e4f6 100644
--- a/holocron/trainer/core.py
+++ b/holocron/trainer/core.py
@@ -110,12 +110,12 @@ def save(self, output_file: str) -> None:
             output_file: destination file path
         """
         torch.save(
-            dict(
-                epoch=self.epoch,
-                step=self.step,
-                min_loss=self.min_loss,
-                model=self.model.state_dict(),
-            ),
+            {
+                "epoch": self.epoch,
+                "step": self.step,
+                "min_loss": self.min_loss,
+                "model": self.model.state_dict(),
+            },
             output_file,
             _use_new_zipfile_serialization=False,
         )
@@ -243,12 +243,12 @@ def _reset_opt(self, lr: float, norm_weight_decay: Optional[float] = None) -> No
         self._set_params(norm_weight_decay)
         # Split it if norm layers needs custom WD
         if norm_weight_decay is None:
-            self.optimizer.add_param_group(dict(params=self._params[0]))
+            self.optimizer.add_param_group({"params": self._params[0]})
         else:
             wd_groups = [norm_weight_decay, self.optimizer.defaults.get("weight_decay", 0)]
             for _params, _wd in zip(self._params, wd_groups):
                 if len(_params) > 0:
-                    self.optimizer.add_param_group(dict(params=_params, weight_decay=_wd))
+                    self.optimizer.add_param_group({"params": _params, "weight_decay": _wd})
         self.optimizer.zero_grad()
 
     @torch.inference_mode()
@@ -287,7 +287,6 @@ def fit_n_epochs(
             norm_weight_decay (float, optional): weight decay to apply to normalization parameters
             **kwargs: keyword args passed to the schedulers
         """
-
         freeze_model(self.model.train(), freeze_until)
         # Update param groups & LR
         self._reset_opt(lr, norm_weight_decay)
@@ -299,16 +298,15 @@ def fit_n_epochs(
 
         mb = master_bar(range(num_epochs))
         for _ in mb:
-
             self._fit_epoch(mb)
             eval_metrics = self.evaluate()
 
             # master bar
             mb.main_bar.comment = f"Epoch {self.epoch}/{self.start_epoch + num_epochs}"
-            mb.write(f"Epoch {self.epoch}/{self.start_epoch + num_epochs} - " f"{self._eval_metrics_str(eval_metrics)}")
+            mb.write(f"Epoch {self.epoch}/{self.start_epoch + num_epochs} - {self._eval_metrics_str(eval_metrics)}")
 
             if eval_metrics["val_loss"] < self.min_loss:
-                print(
+                print(  # noqa: T201
                     f"Validation loss decreased {self.min_loss:.4} --> "
                     f"{eval_metrics['val_loss']:.4}: saving state..."
                 )
@@ -336,7 +334,6 @@ def find_lr(
            norm_weight_decay (float, optional): weight decay to apply to normalization parameters
            num_it (int, optional): number of iterations to perform
         """
-
         if num_it > len(self.train_loader):
             raise ValueError("the value of `num_it` needs to be lower than the number of available batches")
 
@@ -379,8 +376,8 @@ def plot_recorder(self, beta: float = 0.95, **kwargs: Any) -> None:
 
         Args:
             beta (float, optional): smoothing factor
+            kwargs: keyword args of matplotlib.pyplot.show
         """
-
         if len(self.lr_recorder) != len(self.loss_recorder) or len(self.lr_recorder) == 0:
             raise AssertionError("Please run the `lr_find` method first")
 
@@ -398,7 +395,7 @@ def plot_recorder(self, beta: float = 0.95, **kwargs: Any) -> None:
         )
         vals: np.ndarray = np.array(smoothed_losses[data_slice])
         min_idx = vals.argmin()
-        max_val = vals.max() if min_idx is None else vals[: min_idx + 1].max()  # type: ignore[misc]
+        max_val = vals.max() if min_idx is None else vals[: min_idx + 1].max()
         delta = max_val - vals[min_idx]
 
         plt.plot(self.lr_recorder[data_slice], smoothed_losses[data_slice])
@@ -424,8 +421,8 @@ def check_setup(
             lr (float, optional): learning rate to be used for training
             norm_weight_decay (float, optional): weight decay to apply to normalization parameters
             num_it (int, optional): number of iterations to perform
+            kwargs: keyword args of matplotlib.pyplot.show
         """
-
         freeze_model(self.model.train(), freeze_until)
         # Update param groups & LR
         self._reset_opt(lr, norm_weight_decay)
diff --git a/holocron/trainer/detection.py b/holocron/trainer/detection.py
index e62639490..7afe7527a 100644
--- a/holocron/trainer/detection.py
+++ b/holocron/trainer/detection.py
@@ -123,4 +123,4 @@ def evaluate(self, iou_threshold: float = 0.5) -> Dict[str, Optional[float]]:
         clf_err = 1 - correct / loc_assigns if loc_assigns > 0 else None
         # End-to-end
         det_err = 1 - 2 * correct / (nb_preds + num_samples) if nb_preds + num_samples > 0 else None
-        return dict(loc_err=loc_err, clf_err=clf_err, det_err=det_err, val_loss=loc_err)
+        return {"loc_err": loc_err, "clf_err": clf_err, "det_err": det_err, "val_loss": loc_err}
diff --git a/holocron/trainer/segmentation.py b/holocron/trainer/segmentation.py
index c46d7f9e3..f39ec242e 100644
--- a/holocron/trainer/segmentation.py
+++ b/holocron/trainer/segmentation.py
@@ -45,7 +45,6 @@ def evaluate(self, ignore_index: int = 255) -> Dict[str, float]:
         Returns:
             dict: evaluation metrics
         """
-
         self.model.eval()
 
         val_loss, mean_iou, num_valid_batches = 0.0, 0.0, 0
@@ -74,7 +73,7 @@ def evaluate(self, ignore_index: int = 255) -> Dict[str, float]:
         acc_global = (torch.diag(conf_mat).sum() / conf_mat.sum()).item()
         mean_iou = (torch.diag(conf_mat) / (conf_mat.sum(1) + conf_mat.sum(0) - torch.diag(conf_mat))).mean().item()
 
-        return dict(val_loss=val_loss, acc_global=acc_global, mean_iou=mean_iou)
+        return {"val_loss": val_loss, "acc_global": acc_global, "mean_iou": mean_iou}
 
     @staticmethod
     def _eval_metrics_str(eval_metrics: Dict[str, float]) -> str:
diff --git a/holocron/trainer/utils.py b/holocron/trainer/utils.py
index 7ae1da7f1..fee8cc04f 100644
--- a/holocron/trainer/utils.py
+++ b/holocron/trainer/utils.py
@@ -22,7 +22,6 @@ def freeze_bn(mod: nn.Module) -> None:
     Args:
         mod (torch.nn.Module): model to train
     """
-
     # Loop on modules
     for m in mod.modules():
         if isinstance(m, _BatchNorm) and m.affine and all(not p.requires_grad for p in m.parameters()):
@@ -48,7 +47,6 @@ def freeze_model(
         last_frozen_layer (str, optional): last layer to freeze. Assumes layers have been registered in forward order
         frozen_bn_stat_update (bool, optional): force stats update in BN layers that are frozen
     """
-
     # Unfreeze everything
     for p in model.parameters():
         p.requires_grad_(True)
@@ -76,6 +74,7 @@ def split_normalization_params(
     model: nn.Module,
     norm_classes: Optional[List[type]] = None,
 ) -> Tuple[List[nn.Parameter], List[nn.Parameter]]:
+    """Split the param groups by normalization schemes"""
     # Borrowed from https://github.com/pytorch/vision/blob/main/torchvision/ops/_utils.py
     # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501
     if not norm_classes:
diff --git a/holocron/transforms/interpolation.py b/holocron/transforms/interpolation.py
index b62075702..3833eb0d5 100644
--- a/holocron/transforms/interpolation.py
+++ b/holocron/transforms/interpolation.py
@@ -27,7 +27,8 @@ class ResizeMethod(str, Enum):
 
 def _get_image_shape(image: Union[Image.Image, torch.Tensor]) -> Tuple[int, int]:
     if isinstance(image, torch.Tensor):
-        assert image.ndim == 3
+        if image.ndim != 3:
+            raise ValueError("the input tensor is expected to be 3-dimensional")
         h, w = image.shape[1:]
     elif isinstance(image, Image.Image):
         w, h = image.size
@@ -65,8 +66,10 @@ def __init__(
         pad_mode: str = "constant",
         **kwargs: Any,
     ) -> None:
-        assert isinstance(mode, ResizeMethod)
-        assert isinstance(size, (tuple, list)) and len(size) == 2 and all(s > 0 for s in size)
+        if not isinstance(mode, ResizeMethod):
+            raise ValueError("mode is expected to be a ResizeMethod")
+        if not isinstance(size, (tuple, list)) or len(size) != 2 or any(s <= 0 for s in size):
+            raise ValueError("size is expected to be a sequence of 2 positive integers")
         super().__init__(size, **kwargs)
         self.mode = mode
         self.pad_mode = pad_mode
@@ -115,8 +118,10 @@ class RandomZoomOut(nn.Module):
     """
 
     def __init__(self, size: Tuple[int, int], scale: Tuple[float, float] = (0.5, 1.0), **kwargs: Any):
-        assert isinstance(size, (tuple, list)) and len(size) == 2 and all(s > 0 for s in size)
-        assert len(scale) == 2 and scale[0] <= scale[1]
+        if not isinstance(size, (tuple, list)) or len(size) != 2 or any(s <= 0 for s in size):
+            raise ValueError("size is expected to be a sequence of 2 positive integers")
+        if len(scale) != 2 or scale[0] > scale[1]:
+            raise ValueError("scale is expected to be a couple of floats, the first one being small than the second")
         super().__init__()
         self.size = size
         self.scale = scale
diff --git a/holocron/utils/data/collate.py b/holocron/utils/data/collate.py
index dba5b2745..04bad70b3 100644
--- a/holocron/utils/data/collate.py
+++ b/holocron/utils/data/collate.py
@@ -5,9 +5,9 @@
 
 from typing import Tuple
 
-import numpy as np
 import torch
 from torch import Tensor
+from torch.distributions.beta import Beta
 from torch.nn.functional import one_hot
 
 __all__ = ["Mixup"]
@@ -36,7 +36,6 @@ def __init__(self, num_classes: int, alpha: float = 0.2) -> None:
         self.alpha = alpha
 
     def forward(self, inputs: Tensor, targets: Tensor) -> Tuple[Tensor, Tensor]:
-
         # Convert target to one-hot
         if targets.ndim == 1:
             # (N,) --> (N, C)
@@ -49,7 +48,7 @@ def forward(self, inputs: Tensor, targets: Tensor) -> Tuple[Tensor, Tensor]:
         # Sample lambda
         if self.alpha == 0:
             return inputs, targets
-        lam = np.random.beta(self.alpha, self.alpha)
+        lam = Beta(self.alpha, self.alpha).sample()
 
         # Mix batch indices
         batch_size = inputs.size()[0]
diff --git a/holocron/utils/misc.py b/holocron/utils/misc.py
index 4227cac3e..b3025f6ff 100644
--- a/holocron/utils/misc.py
+++ b/holocron/utils/misc.py
@@ -33,15 +33,15 @@ def parallel(
     >>> parallel(lambda x: x ** 2, list(range(10)))
 
     Args:
-        func (callable): function to be executed on multiple workers
-        arr (iterable): function argument's values
-        num_threads (int, optional): number of workers to be used for multiprocessing
+        func: function to be executed on multiple workers
+        arr: function argument's values
+        num_threads: number of workers to be used for multiprocessing
+        progress: whether the progress bar should be displayed
         kwargs: keyword arguments of tqdm
 
     Returns:
         list: list of function's results
     """
-
     num_threads = num_threads if isinstance(num_threads, int) else min(16, mp.cpu_count())
     if num_threads < 2:
         if progress:
@@ -63,11 +63,11 @@ def find_image_size(dataset: Sequence[Tuple[Image.Image, Any]], **kwargs: Any) -
 
     Args:
         dataset: an iterator yielding a PIL Image and a target object
+        kwargs: keyword args of matplotlib.pyplot.show
 
     Returns:
         the suggested height and width to be used
     """
-
     # Record height & width
     _shapes = parallel(lambda x: x[0].size, dataset, progress=True)
 
diff --git a/pyproject.toml b/pyproject.toml
index 9c2468672..3919cd19a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ classifiers = [
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
     "Topic :: Scientific/Engineering",
     "Topic :: Scientific/Engineering :: Mathematics",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
@@ -55,11 +56,10 @@ training = [
     "codecarbon>=2.0.0,<3.0.0",
 ]
 quality = [
-    "ruff>=0.0.260,<1.0.0",
-    "isort>=5.7.0",
-    "mypy>=1.2.0",
-    "pydocstyle[toml]>=6.0.0",
-    "black==22.3.0",
+    "ruff>=0.0.290,<1.0.0",
+    "mypy==1.4.1",
+    "types-tqdm",
+    "black==23.3.0",
     "bandit[toml]>=1.7.0,<1.8.0",
     "pre-commit>=2.17.0,<3.0.0",
 ]
@@ -83,11 +83,10 @@ dev = [
     "pytest>=7.2.0",
     "coverage[toml]>=4.5.4",
     # style
-    "ruff>=0.0.260,<1.0.0",
-    "isort>=5.7.0",
-    "mypy>=1.2.0",
-    "pydocstyle[toml]>=6.0.0",
-    "black==22.3.0",
+    "ruff>=0.0.290,<1.0.0",
+    "mypy==1.4.1",
+    "types-tqdm",
+    "black==23.3.0",
     "bandit[toml]>=1.7.0,<1.8.0",
     "pre-commit>=2.17.0,<3.0.0",
     # docs
@@ -116,53 +115,95 @@ exclude = ["demo*", "docs*", "notebooks*", "scripts*", "tests*"]
 source = ["holocron"]
 
 [tool.ruff]
-ignore = ["E402", "F403", "E731"]
-exclude = [".git", "venv*", "build", "docs"]
+select = [
+    "E",  # pycodestyle errors
+    "W",  # pycodestyle warnings
+    "D101", "D103",  # pydocstyle missing docstring in public function/class
+    "D201","D202","D207","D208","D214","D215","D300","D301","D417", "D419",  # pydocstyle
+    "F",  # pyflakes
+    "I",  # isort
+    "C4",  # flake8-comprehensions
+    "B",  # flake8-bugbear
+    "CPY001",  # flake8-copyright
+    "ISC",  # flake8-implicit-str-concat
+    "PYI",  # flake8-pyi
+    "NPY",  # numpy
+    "PERF",  # perflint
+    "RUF",  # ruff specific
+    "PTH",  # flake8-use-pathlib
+    "S",  # flake8-bandit
+    "N",  # pep8-naming
+    "T10",  # flake8-debugger
+    "T20",  # flake8-print
+    "PT",  # flake8-pytest-style
+    "LOG",  # flake8-logging
+]
+ignore = [
+    "E501",  # line too long, handled by black
+    "E203",  # space before slice
+    "B008",  # do not perform function calls in argument defaults
+    "B904",  # raise from
+    "C901",  # too complex
+    "F403",  # star imports
+    "E731",  # lambda assignment
+    "C416",  # list comprehension to list()
+    "PT011",  # pytest.raises must have a match pattern
+    "N812",  # Lowercase imported as non-lowercase
+]
+exclude = [".git"]
 line-length = 120
-target-version = "py38"
+target-version = "py39"
+preview = true
 
 [tool.ruff.per-file-ignores]
-"**/__init__.py" = ["F401"]
+"**/__init__.py" = ["I001", "F401", "CPY001"]
+"scripts/**.py" = ["D", "T201"]
+".github/**.py" = ["D", "T201", "S602"]
+"docs/**.py" = ["E402", "D103"]
+"tests/**.py" = ["D103", "CPY001", "S101"]
+"demo/**.py" = ["D103"]
+"api/**.py" = ["D103"]
+"api/tests/**.py" = ["CPY001", "S101"]
+"references/**.py" = ["D101", "D103", "T201"]
+"setup.py" = ["T201"]
+"holocron/nn/modules/**.py" = ["N806"]
+"holocron/models/classification/**.py" = ["N801"]
+"holocron/models/*/**.py" = ["D101"]
+"holocron/nn/functional.py" = ["N802"]
 
 [tool.ruff.flake8-quotes]
 docstring-quotes = "double"
 
+[tool.ruff.isort]
+known-first-party = ["holocron", "app"]
+known-third-party = ["torch", "torchvision", "fastprogress", "fastapi", "wandb"]
+
 [tool.mypy]
+python_version = "3.9"
 files = "holocron/"
 show_error_codes = true
 pretty = true
 warn_unused_ignores = true
 warn_redundant_casts = true
+warn_return_any = true
 no_implicit_optional = true
 check_untyped_defs = true
 implicit_reexport = false
+disallow_untyped_defs = true
 
 [[tool.mypy.overrides]]
 module = [
-    "matplotlib.*",
+    "PIL",
     "torchvision.*",
-    "huggingface_hub",
+    "matplotlib.*",
     "fastprogress.*",
-    "tqdm.*",
-    "PIL.*",
+    "huggingface_hub.*",
 ]
 ignore_missing_imports = true
 
-[tool.isort]
-profile = "black"
-line_length = 120
-src_paths = ["holocron", "tests", "references", "scripts", "docs", "demo", "api", ".github"]
-skip_glob = "**/__init__.py"
-known_third_party = ["torch", "torchvision", "fastprogress"]
-known_first_party = ["app"]
-
-[tool.pydocstyle]
-select = "D300,D301,D417"
-match = ".*\\.py"
-
 [tool.black]
 line-length = 120
-target-version = ['py38']
+target-version = ['py39']
 
 [tool.bandit]
 exclude_dirs = [".github/collect_env.py"]
diff --git a/references/classification/train.py b/references/classification/train.py
index 1f686e63b..4fb51af5f 100644
--- a/references/classification/train.py
+++ b/references/classification/train.py
@@ -12,6 +12,7 @@
 import math
 import os
 import time
+from pathlib import Path
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -44,7 +45,7 @@
 
 
 def worker_init_fn(worker_id: int) -> None:
-    np.random.seed((worker_id + torch.initial_seed()) % np.iinfo(np.int32).max)
+    np.random.default_rng((worker_id + torch.initial_seed()) % np.iinfo(np.int32).max)
 
 
 def plot_samples(images, targets, num_samples=8):
@@ -76,7 +77,6 @@ def plot_samples(images, targets, num_samples=8):
 
 @track_emissions()
 def main(args):
-
     print(args)
 
     torch.backends.cudnn.benchmark = True
@@ -95,9 +95,8 @@ def main(args):
     if not args.test_only:
         st = time.time()
         if args.dataset.lower() == "imagenette":
-
             train_set = ImageFolder(
-                os.path.join(args.data_path, "train"),
+                Path(args.data_path).joinpath("train"),
                 T.Compose(
                     [
                         T.RandomResizedCrop(args.train_crop_size, scale=(0.3, 1.0), interpolation=interpolation),
@@ -138,7 +137,7 @@ def main(args):
         collate_fn = default_collate
         if args.mixup_alpha > 0:
             mix = Mixup(len(train_set.classes), alpha=args.mixup_alpha)
-            collate_fn = lambda batch: mix(*default_collate(batch))  # noqa: E731
+            collate_fn = lambda batch: mix(*default_collate(batch))
         train_loader = torch.utils.data.DataLoader(
             train_set,
             batch_size=args.batch_size,
@@ -164,7 +163,7 @@ def main(args):
         st = time.time()
         if args.dataset.lower() == "imagenette":
             val_set = ImageFolder(
-                os.path.join(args.data_path, "val"),
+                Path(args.data_path).joinpath("val"),
                 T.Compose(
                     [
                         T.Resize(args.val_resize_size, interpolation=interpolation),
@@ -264,7 +263,6 @@ def main(args):
 
     # W&B
     if args.wb:
-
         run = wandb.init(
             name=exp_name,
             project="holocron-image-classification",
diff --git a/references/clean_checkpoint.py b/references/clean_checkpoint.py
index bddb171a2..62d4bc493 100644
--- a/references/clean_checkpoint.py
+++ b/references/clean_checkpoint.py
@@ -4,16 +4,16 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
 import hashlib
+from pathlib import Path
 
 import torch
 
 
 def main(args):
-
     checkpoint = torch.load(args.checkpoint, map_location="cpu")["model"]
     torch.save(checkpoint, args.outfile, _use_new_zipfile_serialization=False)
 
-    with open(args.outfile, "rb") as f:
+    with Path(args.outfile).open("rb") as f:
         sha_hash = hashlib.sha256(f.read()).hexdigest()
     print(f"Checkpoint saved to {args.outfile} with hash: {sha_hash[:8]}")
 
diff --git a/references/detection/train.py b/references/detection/train.py
index fab44811a..f48813124 100644
--- a/references/detection/train.py
+++ b/references/detection/train.py
@@ -24,12 +24,12 @@
 from torchvision.datasets import VOCDetection
 from torchvision.models import detection as tv_detection
 from torchvision.transforms.functional import InterpolationMode, to_pil_image
+from transforms import Compose, ImageTransform, RandomHorizontalFlip, Resize, VOCTargetTransform, convert_to_relative
 
 import holocron
 from holocron.models import detection
 from holocron.trainer import DetectionTrainer
 from holocron.utils.misc import find_image_size
-from transforms import Compose, ImageTransform, RandomHorizontalFlip, Resize, VOCTargetTransform, convert_to_relative
 
 VOC_CLASSES = [
     "aeroplane",
@@ -56,7 +56,7 @@
 
 
 def worker_init_fn(worker_id: int) -> None:
-    np.random.seed((worker_id + torch.initial_seed()) % np.iinfo(np.int32).max)
+    np.random.default_rng((worker_id + torch.initial_seed()) % np.iinfo(np.int32).max)
 
 
 def collate_fn(batch):
@@ -96,7 +96,6 @@ def plot_samples(images, targets, num_samples=8):
 
 @track_emissions()
 def main(args):
-
     print(args)
 
     torch.backends.cudnn.benchmark = True
@@ -253,7 +252,6 @@ def main(args):
 
     # W&B
     if args.wb:
-
         run = wandb.init(
             name=exp_name,
             project="holocron-object-detection",
diff --git a/references/detection/transforms.py b/references/detection/transforms.py
index 8fc118cd8..aebaa9c23 100644
--- a/references/detection/transforms.py
+++ b/references/detection/transforms.py
@@ -33,7 +33,7 @@ def __call__(self, image, target):
         # Encode class labels
         labels = torch.tensor([self.class_map[obj["name"]] for obj in target["annotation"]["object"]], dtype=torch.long)
 
-        return image, dict(boxes=boxes, labels=labels)
+        return image, {"boxes": boxes, "labels": labels}
 
 
 class Compose(transforms.Compose):
@@ -106,7 +106,6 @@ def __call__(self, image, target):
 
 
 def convert_to_relative(image, target):
-
     target["boxes"][:, [0, 2]] /= image.size[0]
     target["boxes"][:, [1, 3]] /= image.size[1]
 
diff --git a/references/segmentation/train.py b/references/segmentation/train.py
index ac4c5c355..8a80c2c98 100644
--- a/references/segmentation/train.py
+++ b/references/segmentation/train.py
@@ -23,12 +23,12 @@
 from torchvision.datasets import VOCSegmentation
 from torchvision.models import segmentation as tv_segmentation
 from torchvision.transforms.functional import InterpolationMode, to_pil_image
+from transforms import Compose, ImageTransform, RandomCrop, RandomHorizontalFlip, RandomResize, Resize, ToTensor
 
 import holocron
 from holocron.models import segmentation
 from holocron.trainer import SegmentationTrainer
 from holocron.utils.misc import find_image_size
-from transforms import Compose, ImageTransform, RandomCrop, RandomHorizontalFlip, RandomResize, Resize, ToTensor
 
 VOC_CLASSES = [
     "background",
@@ -56,7 +56,7 @@
 
 
 def worker_init_fn(worker_id: int) -> None:
-    np.random.seed((worker_id + torch.initial_seed()) % np.iinfo(np.int32).max)
+    np.random.default_rng((worker_id + torch.initial_seed()) % np.iinfo(np.int32).max)
 
 
 def plot_samples(images, targets, ignore_index=None):
@@ -111,7 +111,6 @@ def plot_predictions(images, preds, targets, ignore_index=None):
 
 @track_emissions()
 def main(args):
-
     print(args)
 
     torch.backends.cudnn.benchmark = True
@@ -280,7 +279,6 @@ def main(args):
 
     # W&B
     if args.wb:
-
         run = wandb.init(
             name=exp_name,
             project="holocron-semantic-segmentation",
diff --git a/references/segmentation/transforms.py b/references/segmentation/transforms.py
index 5b26d0499..3be1f445b 100644
--- a/references/segmentation/transforms.py
+++ b/references/segmentation/transforms.py
@@ -9,9 +9,8 @@
 
 import numpy as np
 import torch
-from torchvision.transforms import InterpolationMode
+from torchvision.transforms import InterpolationMode, transforms
 from torchvision.transforms import functional as F
-from torchvision.transforms import transforms
 
 
 def pad_if_smaller(img, size, fill=0):
@@ -103,7 +102,6 @@ def __repr__(self):
 
 class ToTensor(transforms.ToTensor):
     def __call__(self, img, target):
-
         img = super(ToTensor, self).__call__(img)
         target = torch.as_tensor(np.array(target), dtype=torch.int64)
 
diff --git a/scripts/eval_latency.py b/scripts/eval_latency.py
index 17f7f8928..caedcec9d 100644
--- a/scripts/eval_latency.py
+++ b/scripts/eval_latency.py
@@ -18,7 +18,6 @@
 
 @torch.inference_mode()
 def main(args):
-
     if args.device is None:
         args.device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
@@ -31,6 +30,10 @@ def main(args):
     if args.arch.startswith("repvgg") or args.arch.startswith("mobileone"):
         model.reparametrize()
 
+    # Compile (using tensor cores)
+    torch.set_float32_matmul_precision("high")
+    model = torch.compile(model)
+
     # Input
     img_tensor = torch.rand((1, 3, args.size, args.size)).to(device=device)
 
diff --git a/scripts/export_to_onnx.py b/scripts/export_to_onnx.py
index b84955707..d3402719b 100644
--- a/scripts/export_to_onnx.py
+++ b/scripts/export_to_onnx.py
@@ -16,7 +16,6 @@
 
 @torch.inference_mode()
 def main(args):
-
     is_pretrained = args.pretrained and not isinstance(args.checkpoint, str)
     # Pretrained imagenet model
     model = models.__dict__[args.arch](pretrained=is_pretrained).eval()
diff --git a/setup.py b/setup.py
index 2bf088286..5a94ce84f 100644
--- a/setup.py
+++ b/setup.py
@@ -14,12 +14,11 @@
 
 
 if __name__ == "__main__":
-
     print(f"Building wheel {PKG_NAME}-{VERSION}")
 
     # Dynamically set the __version__ attribute
     cwd = Path(__file__).parent.absolute()
-    with open(cwd.joinpath("holocron", "version.py"), "w", encoding="utf-8") as f:
+    with cwd.joinpath("holocron", "version.py").open("w", encoding="utf-8") as f:
         f.write(f"__version__ = '{VERSION}'\n")
 
     setup(name=PKG_NAME, version=VERSION)
diff --git a/tests/test_models.py b/tests/test_models.py
index 292f4cc47..d32318520 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -8,7 +8,6 @@
 
 
 def _test_conv_seq(conv_seq, expected_classes, expected_channels):
-
     assert len(conv_seq) == len(expected_classes)
     for _layer, mod_class in zip(conv_seq, expected_classes):
         assert isinstance(_layer, mod_class)
@@ -20,7 +19,6 @@ def _test_conv_seq(conv_seq, expected_classes, expected_channels):
 
 
 def test_conv_sequence():
-
     mod = utils.conv_sequence(
         3,
         32,
@@ -51,11 +49,10 @@ def test_conv_sequence():
     assert mod[0].bias is None
     # Ensures that bias is added when there is no BN
     mod = utils.conv_sequence(3, 32, kernel_size=3, stride=2, act_layer=nn.ReLU(inplace=True))
-    assert isinstance(getattr(mod[0], "bias"), nn.Parameter)
+    assert isinstance(mod[0].bias, nn.Parameter)
 
 
 def test_fuse_conv_bn():
-
     # Check the channel verification
     with pytest.raises(AssertionError):
         utils.fuse_conv_bn(nn.Conv2d(3, 5, 3), nn.BatchNorm2d(3))
@@ -87,7 +84,6 @@ def test_fuse_conv_bn():
 
 
 def test_model_from_hf_hub():
-
     model = utils.model_from_hf_hub("frgfm/repvgg_a0")
     # Check model type
     assert isinstance(model, RepVGG)
diff --git a/tests/test_models_classification.py b/tests/test_models_classification.py
index c5c83a39c..62b7bb3f0 100644
--- a/tests/test_models_classification.py
+++ b/tests/test_models_classification.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 
 import pytest
 import torch
@@ -8,7 +8,6 @@
 
 
 def _test_classification_model(name, num_classes, pretrained):
-
     batch_size = 2
     x = torch.rand((batch_size, 3, 224, 224))
     model = classification.__dict__[name](pretrained=pretrained, num_classes=num_classes).eval()
@@ -43,7 +42,7 @@ def test_repvgg_reparametrize():
             assert mod.weight.data.shape[2:] == (3, 3)
     # Check that values are still matching
     with torch.no_grad():
-        assert torch.allclose(out, model(x), atol=1e-5)
+        assert torch.allclose(out, model(x), atol=1e-4)
 
 
 def test_mobileone_reparametrize():
@@ -65,48 +64,48 @@ def test_mobileone_reparametrize():
 
 
 @pytest.mark.parametrize(
-    "arch, pretrained",
+    ("arch", "pretrained"),
     [
-        ["darknet24", True],
-        ["darknet19", True],
-        ["darknet53", True],
-        ["cspdarknet53", True],
-        ["cspdarknet53_mish", True],
-        ["resnet18", True],
-        ["resnet34", True],
-        ["resnet50", True],
-        ["resnet101", True],
-        ["resnet152", True],
-        ["resnext50_32x4d", True],
-        ["resnext101_32x8d", True],
-        ["resnet50d", True],
-        ["res2net50_26w_4s", True],
-        ["tridentnet50", True],
-        ["pyconv_resnet50", True],
-        ["pyconvhg_resnet50", True],
-        ["rexnet1_0x", True],
-        ["rexnet1_3x", False],
-        ["rexnet1_5x", False],
-        ["rexnet2_0x", False],
-        ["rexnet2_2x", False],
-        ["sknet50", True],
-        ["sknet101", True],
-        ["sknet152", True],
-        ["repvgg_a0", True],
-        ["repvgg_b0", False],
-        ["convnext_atto", True],
-        ["convnext_femto", False],
-        ["convnext_pico", False],
-        ["convnext_nano", False],
-        ["convnext_tiny", False],
-        ["convnext_small", False],
-        ["convnext_base", False],
-        ["convnext_large", False],
-        ["convnext_xl", False],
-        ["mobileone_s0", True],
-        ["mobileone_s1", False],
-        ["mobileone_s2", False],
-        ["mobileone_s3", False],
+        ("darknet24", True),
+        ("darknet19", True),
+        ("darknet53", True),
+        ("cspdarknet53", True),
+        ("cspdarknet53_mish", True),
+        ("resnet18", True),
+        ("resnet34", True),
+        ("resnet50", True),
+        ("resnet101", True),
+        ("resnet152", True),
+        ("resnext50_32x4d", True),
+        ("resnext101_32x8d", True),
+        ("resnet50d", True),
+        ("res2net50_26w_4s", True),
+        ("tridentnet50", True),
+        ("pyconv_resnet50", True),
+        ("pyconvhg_resnet50", True),
+        ("rexnet1_0x", True),
+        ("rexnet1_3x", False),
+        ("rexnet1_5x", False),
+        ("rexnet2_0x", False),
+        ("rexnet2_2x", False),
+        ("sknet50", True),
+        ("sknet101", True),
+        ("sknet152", True),
+        ("repvgg_a0", True),
+        ("repvgg_b0", False),
+        ("convnext_atto", True),
+        ("convnext_femto", False),
+        ("convnext_pico", False),
+        ("convnext_nano", False),
+        ("convnext_tiny", False),
+        ("convnext_small", False),
+        ("convnext_base", False),
+        ("convnext_large", False),
+        ("convnext_xl", False),
+        ("mobileone_s0", True),
+        ("mobileone_s1", False),
+        ("mobileone_s2", False),
+        ("mobileone_s3", False),
     ],
 )
 def test_classification_model(arch, pretrained):
@@ -134,7 +133,7 @@ def test_classification_model(arch, pretrained):
 )
 def test_classification_onnx_export(arch, tmpdir_factory):
     model = classification.__dict__[arch](pretrained=False, num_classes=10).eval()
-    tmp_path = os.path.join(str(tmpdir_factory.mktemp("onnx")), f"{arch}.onnx")
+    tmp_path = Path(str(tmpdir_factory.mktemp("onnx"))).joinpath(f"{arch}.onnx")
     img_tensor = torch.rand((1, 3, 224, 224))
     with torch.no_grad():
         torch.onnx.export(model, img_tensor, tmp_path, export_params=True, opset_version=14)
diff --git a/tests/test_models_detection.py b/tests/test_models_detection.py
index e5ef75f53..919d74e5c 100644
--- a/tests/test_models_detection.py
+++ b/tests/test_models_detection.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 
 import pytest
 import torch
@@ -7,7 +7,6 @@
 
 
 def _test_detection_model(name, input_size):
-
     num_classes = 10
     batch_size = 2
     x = torch.rand((batch_size, 3, *input_size))
@@ -51,7 +50,7 @@ def _test_detection_model(name, input_size):
     gt_labels = [(num_classes * torch.rand(num)).to(dtype=torch.long) for num in num_boxes]
 
     # Loss computation
-    loss = model(x, [dict(boxes=boxes, labels=labels) for boxes, labels in zip(gt_boxes, gt_labels)])
+    loss = model(x, [{"boxes": boxes, "labels": labels} for boxes, labels in zip(gt_boxes, gt_labels)])
     assert isinstance(loss, dict)
     for subloss in loss.values():
         assert isinstance(subloss, torch.Tensor)
@@ -61,16 +60,16 @@ def _test_detection_model(name, input_size):
     # Loss computation with no GT
     gt_boxes = [torch.zeros((0, 4)) for _ in num_boxes]
     gt_labels = [torch.zeros(0, dtype=torch.long) for _ in num_boxes]
-    loss = model(x, [dict(boxes=boxes, labels=labels) for boxes, labels in zip(gt_boxes, gt_labels)])
+    loss = model(x, [{"boxes": boxes, "labels": labels} for boxes, labels in zip(gt_boxes, gt_labels)])
     sum(v for v in loss.values()).backward()
 
 
 @pytest.mark.parametrize(
-    "arch, input_shape",
+    ("arch", "input_shape"),
     [
-        ["yolov1", (448, 448)],
-        ["yolov2", (416, 416)],
-        ["yolov4", (608, 608)],
+        ("yolov1", (448, 448)),
+        ("yolov2", (416, 416)),
+        ("yolov4", (608, 608)),
     ],
 )
 def test_detection_model(arch, input_shape):
@@ -78,16 +77,16 @@ def test_detection_model(arch, input_shape):
 
 
 @pytest.mark.parametrize(
-    "arch, input_shape",
+    ("arch", "input_shape"),
     [
-        ["yolov1", (448, 448)],
-        ["yolov2", (416, 416)],
-        ["yolov4", (608, 608)],
+        ("yolov1", (448, 448)),
+        ("yolov2", (416, 416)),
+        ("yolov4", (608, 608)),
     ],
 )
 def test_detection_onnx_export(arch, input_shape, tmpdir_factory):
     model = detection.__dict__[arch](pretrained=False, num_classes=10).eval()
-    tmp_path = os.path.join(str(tmpdir_factory.mktemp("onnx")), f"{arch}.onnx")
+    tmp_path = Path(str(tmpdir_factory.mktemp("onnx"))).joinpath(f"{arch}.onnx")
     img_tensor = torch.rand((1, 3, *input_shape))
     with torch.no_grad():
         torch.onnx.export(model, img_tensor, tmp_path, export_params=True, opset_version=14)
@@ -112,15 +111,18 @@ def test_yolov1():
     assert b_coords.shape == (n, h, w, num_anchors, 4)
     assert b_o.shape == (n, h, w, num_anchors)
     assert b_scores.shape == (n, h, w, 1, num_classes)
-    assert torch.all(b_coords <= 1) and torch.all(b_coords >= 0)
-    assert torch.all(b_o <= 1) and torch.all(b_o >= 0)
+    assert torch.all(b_coords <= 1)
+    assert torch.all(b_coords >= 0)
+    assert torch.all(b_o <= 1)
+    assert torch.all(b_o >= 0)
     assert torch.allclose(b_scores.sum(-1), torch.ones(1))
 
     # Compute loss
     target = [
-        dict(
-            boxes=torch.tensor([[0, 0, 1 / 7, 1 / 7]], dtype=torch.float32), labels=torch.zeros((1,), dtype=torch.long)
-        )
+        {
+            "boxes": torch.tensor([[0, 0, 1 / 7, 1 / 7]], dtype=torch.float32),
+            "labels": torch.zeros((1,), dtype=torch.long),
+        }
     ]
     pred_boxes = torch.zeros((1, h, w, num_anchors, 4), dtype=torch.float32)
     pred_boxes[..., :2] = 0.5
@@ -180,12 +182,16 @@ def test_yolov2():
     assert b_coords.shape == (n, h, w, num_anchors, 4)
     assert b_o.shape == (n, h, w, num_anchors)
     assert b_scores.shape == (n, h, w, num_anchors, num_classes)
-    assert torch.all(b_coords[..., :2] <= 1) and torch.all(b_coords >= 0)
-    assert torch.all(b_o <= 1) and torch.all(b_o >= 0)
+    assert torch.all(b_coords[..., :2] <= 1)
+    assert torch.all(b_coords >= 0)
+    assert torch.all(b_o <= 1)
+    assert torch.all(b_o >= 0)
     assert torch.allclose(b_scores.sum(-1), torch.ones(1))
 
     # Compute loss
-    target = [dict(boxes=torch.tensor([[0, 0, 1, 1]], dtype=torch.float32), labels=torch.zeros((1,), dtype=torch.long))]
+    target = [
+        {"boxes": torch.tensor([[0, 0, 1, 1]], dtype=torch.float32), "labels": torch.zeros((1,), dtype=torch.long)}
+    ]
     pred_boxes = torch.zeros((1, h, w, num_anchors, 4), dtype=torch.float32)
     pred_boxes[..., :2] = 0.5
     pred_boxes[..., 2:] = 1
diff --git a/tests/test_models_segmentation.py b/tests/test_models_segmentation.py
index 0b586c287..7c3805c17 100644
--- a/tests/test_models_segmentation.py
+++ b/tests/test_models_segmentation.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 
 import pytest
 import torch
@@ -7,7 +7,6 @@
 
 
 def _test_segmentation_model(name, input_shape):
-
     num_classes = 10
     batch_size = 2
     num_channels = 3
@@ -24,16 +23,16 @@ def _test_segmentation_model(name, input_shape):
 
 
 @pytest.mark.parametrize(
-    "arch, input_shape",
+    ("arch", "input_shape"),
     [
-        ["unet", (256, 256)],
-        ["unet2", (256, 256)],
-        ["unet_rexnet13", (256, 256)],
-        ["unet_tvvgg11", (256, 256)],
-        ["unet_tvresnet34", (256, 256)],
-        ["unetp", (256, 256)],
-        ["unetpp", (256, 256)],
-        ["unet3p", (320, 320)],
+        ("unet", (256, 256)),
+        ("unet2", (256, 256)),
+        ("unet_rexnet13", (256, 256)),
+        ("unet_tvvgg11", (256, 256)),
+        ("unet_tvresnet34", (256, 256)),
+        ("unetp", (256, 256)),
+        ("unetpp", (256, 256)),
+        ("unet3p", (320, 320)),
     ],
 )
 def test_segmentation_model(arch, input_shape):
@@ -41,18 +40,18 @@ def test_segmentation_model(arch, input_shape):
 
 
 @pytest.mark.parametrize(
-    "arch, input_shape",
+    ("arch", "input_shape"),
     [
-        ["unet", (256, 256)],
-        ["unet2", (256, 256)],
-        ["unetp", (256, 256)],
-        ["unetpp", (256, 256)],
-        ["unet3p", (320, 320)],
+        ("unet", (256, 256)),
+        ("unet2", (256, 256)),
+        ("unetp", (256, 256)),
+        ("unetpp", (256, 256)),
+        ("unet3p", (320, 320)),
     ],
 )
 def test_segmentation_onnx_export(arch, input_shape, tmpdir_factory):
     model = segmentation.__dict__[arch](pretrained=False, num_classes=10).eval()
-    tmp_path = os.path.join(str(tmpdir_factory.mktemp("onnx")), f"{arch}.onnx")
+    tmp_path = Path(str(tmpdir_factory.mktemp("onnx"))).joinpath(f"{arch}.onnx")
     img_tensor = torch.rand((1, 3, *input_shape))
     with torch.no_grad():
         torch.onnx.export(model, img_tensor, tmp_path, export_params=True, opset_version=14)
diff --git a/tests/test_nn.py b/tests/test_nn.py
index c82aae855..6fe4bef81 100644
--- a/tests/test_nn.py
+++ b/tests/test_nn.py
@@ -4,7 +4,6 @@
 
 
 def test_dropblock2d():
-
     x = torch.rand(2, 4, 16, 16)
 
     # Drop probability of 1
diff --git a/tests/test_nn_activation.py b/tests/test_nn_activation.py
index 4698e3ca0..dcfaf89c3 100644
--- a/tests/test_nn_activation.py
+++ b/tests/test_nn_activation.py
@@ -7,7 +7,6 @@
 
 
 def _test_activation_function(fn, input_shape):
-
     # Optional testing
     fn_args = inspect.signature(fn).parameters.keys()
     cfg = {}
diff --git a/tests/test_nn_attention.py b/tests/test_nn_attention.py
index dcaa9163a..454577b89 100644
--- a/tests/test_nn_attention.py
+++ b/tests/test_nn_attention.py
@@ -4,7 +4,6 @@
 
 
 def _test_attention_mod(mod):
-
     x = torch.rand(2, 4, 8, 8)
     # Check that attention preserves shape
     mod = mod.eval()
diff --git a/tests/test_nn_conv.py b/tests/test_nn_conv.py
index 828877ddf..7157e4c5e 100644
--- a/tests/test_nn_conv.py
+++ b/tests/test_nn_conv.py
@@ -5,7 +5,6 @@
 
 
 def _test_conv2d(mod, input_shape, output_shape):
-
     x = torch.rand(*input_shape)
 
     out = mod(x)
@@ -34,7 +33,6 @@ def test_pyconv2d():
 
 
 def test_lambdalayer():
-
     with pytest.raises(AssertionError):
         nn.LambdaLayer(3, 31, 16)
     with pytest.raises(AssertionError):
diff --git a/tests/test_nn_downsample.py b/tests/test_nn_downsample.py
index 5370e3c02..71ac85fba 100644
--- a/tests/test_nn_downsample.py
+++ b/tests/test_nn_downsample.py
@@ -7,7 +7,6 @@
 
 
 def test_concatdownsample2d():
-
     num_batches = 2
     num_chan = 4
     scale_factor = 2
@@ -36,7 +35,6 @@ def test_concatdownsample2d():
 
 
 def test_globalavgpool2d():
-
     x = torch.rand(2, 4, 16, 16)
 
     # Check that ops are doing the same thing
@@ -53,7 +51,6 @@ def test_globalavgpool2d():
 
 
 def test_globalmaxpool2d():
-
     x = torch.rand(2, 4, 16, 16)
 
     # Check that ops are doing the same thing
@@ -70,7 +67,6 @@ def test_globalmaxpool2d():
 
 
 def test_blurpool2d():
-
     with pytest.raises(AssertionError):
         downsample.BlurPool2d(1, 0)
 
@@ -90,7 +86,6 @@ def test_blurpool2d():
 
 
 def test_zpool():
-
     num_batches = 2
     num_chan = 4
     x = torch.rand((num_batches, num_chan, 32, 32))
diff --git a/tests/test_nn_init.py b/tests/test_nn_init.py
index 05b516bc7..5d2f7fa2a 100644
--- a/tests/test_nn_init.py
+++ b/tests/test_nn_init.py
@@ -5,7 +5,6 @@
 
 
 def test_init():
-
     module = nn.Sequential(nn.Conv2d(3, 32, 3), nn.BatchNorm2d(32), nn.LeakyReLU(inplace=True))
 
     # Check that each layer was initialized correctly
diff --git a/tests/test_nn_loss.py b/tests/test_nn_loss.py
index 8edcf3070..04d084e87 100644
--- a/tests/test_nn_loss.py
+++ b/tests/test_nn_loss.py
@@ -7,7 +7,6 @@
 
 
 def _test_loss_function(loss_fn, same_loss=0.0, multi_label=False):
-
     num_batches = 2
     num_classes = 4
     # 4 classes
@@ -55,7 +54,6 @@ def _test_loss_function(loss_fn, same_loss=0.0, multi_label=False):
 
 
 def test_focal_loss():
-
     # Common verification
     _test_loss_function(F.focal_loss)
 
@@ -76,7 +74,6 @@ def test_focal_loss():
 
 
 def test_multilabel_cross_entropy():
-
     num_batches = 2
     num_classes = 4
 
@@ -94,7 +91,6 @@ def test_multilabel_cross_entropy():
 
 
 def test_complement_cross_entropy():
-
     num_batches = 2
     num_classes = 4
 
@@ -109,7 +105,6 @@ def test_complement_cross_entropy():
 
 
 def test_mc_loss():
-
     num_batches = 2
     num_classes = 4
     xi = 2
@@ -145,7 +140,6 @@ def test_mc_loss():
 
 
 def test_cb_loss():
-
     num_batches = 2
     num_classes = 4
     x = torch.rand(num_batches, num_classes, 20, 20)
@@ -176,7 +170,6 @@ def test_cb_loss():
 
 
 def test_dice_loss():
-
     num_batches = 2
     num_classes = 4
 
@@ -197,14 +190,13 @@ def test_dice_loss():
 
 
 def test_poly_loss():
-
     _test_loss_function(F.poly_loss)
+    # _test_loss_function(F.poly_loss, multi_label=True)
 
     num_batches = 2
     num_classes = 4
 
     x = torch.rand((num_batches, num_classes, 20, 20), requires_grad=True)
-    target = torch.rand(num_batches, num_classes, 20, 20)
     target = (num_classes * torch.rand(num_batches, 20, 20)).to(torch.long)
 
     # Backprop
diff --git a/tests/test_ops.py b/tests/test_ops.py
index 1fbb64972..859a1b2e6 100644
--- a/tests/test_ops.py
+++ b/tests/test_ops.py
@@ -6,16 +6,14 @@
 from holocron import ops
 
 
-@pytest.fixture(scope="function")
+@pytest.fixture()
 def boxes():
-
     return torch.tensor(
         [[0, 0, 100, 100], [50, 50, 100, 100], [50, 50, 150, 150], [100, 100, 200, 200]], dtype=torch.float32
     )
 
 
 def test_iou_penalty(boxes):
-
     penalty = ops.boxes.iou_penalty(boxes, boxes)
 
     # Check shape
@@ -30,7 +28,6 @@ def test_iou_penalty(boxes):
 
 
 def test_diou_loss(boxes):
-
     diou = ops.boxes.diou_loss(boxes, boxes)
 
     # Check shape
@@ -45,7 +42,6 @@ def test_diou_loss(boxes):
 
 
 def test_box_giou(boxes):
-
     giou = ops.boxes.box_giou(boxes, boxes)
 
     # Check shape
@@ -60,19 +56,16 @@ def test_box_giou(boxes):
 
 
 def test_aspect_ratio(boxes):
-
     # All boxes are squares so arctan should yield Pi / 4
     assert torch.equal(ops.boxes.aspect_ratio(boxes), math.pi / 4 * torch.ones(boxes.shape[0]))
 
 
 def test_aspect_ratio_consistency(boxes):
-
     # All boxes have the same aspect ratio
     assert torch.equal(ops.boxes.aspect_ratio_consistency(boxes, boxes), torch.zeros(boxes.shape[0], boxes.shape[0]))
 
 
 def test_ciou_loss(boxes):
-
     ciou = ops.boxes.ciou_loss(boxes, boxes)
 
     # Check shape
diff --git a/tests/test_optim.py b/tests/test_optim.py
index af539f2e1..4ce450542 100644
--- a/tests/test_optim.py
+++ b/tests/test_optim.py
@@ -8,7 +8,6 @@
 
 
 def _test_optimizer(name: str, **kwargs: Any) -> None:
-
     lr = 1e-4
     input_shape = (3, 224, 224)
     num_batches = 4
diff --git a/tests/test_optim_wrapper.py b/tests/test_optim_wrapper.py
index 84d7c0900..d669577ee 100644
--- a/tests/test_optim_wrapper.py
+++ b/tests/test_optim_wrapper.py
@@ -7,7 +7,6 @@
 
 
 def _test_wrapper(name: str) -> None:
-
     lr = 1e-4
     input_shape = (3, 224, 224)
     num_batches = 4
@@ -45,7 +44,8 @@ def _test_wrapper(name: str) -> None:
         loss.backward()
         opt_wrapper.step()
     # Check update rule
-    assert not torch.equal(_p.data, p_val) and not torch.equal(_p.data, p_val - lr * _p.grad)
+    assert not torch.equal(_p.data, p_val)
+    assert not torch.equal(_p.data, p_val - lr * _p.grad)
 
     # Repr
     assert len(repr(opt_wrapper).split("\n")) == len(repr(optimizer).split("\n")) + 4
diff --git a/tests/test_trainer.py b/tests/test_trainer.py
index 93eb82a62..97470f135 100644
--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -68,7 +68,7 @@ def __init__(self, n):
 
     def __getitem__(self, idx):
         boxes = torch.tensor([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]], dtype=torch.float32)
-        return torch.rand((3, 320, 320)), dict(boxes=boxes, labels=torch.ones(2, dtype=torch.long))
+        return torch.rand((3, 320, 320)), {"boxes": boxes, "labels": torch.ones(2, dtype=torch.long)}
 
     def __len__(self):
         return self.n
@@ -82,7 +82,6 @@ def collate_fn(batch):
 def _test_trainer(
     learner: trainer.Trainer, num_it: int, ref_param: str, freeze_until: Optional[str] = None, lr: float = 1e-3
 ) -> None:
-
     trainer.utils.freeze_model(learner.model.train(), freeze_until)
     learner._reset_opt(lr)
     # Update param groups & LR
@@ -149,7 +148,6 @@ def _test_trainer(
 
 
 def test_classification_trainer(tmpdir_factory):
-
     folder = tmpdir_factory.mktemp("checkpoints")
     file_path = str(folder.join("tmp.pt"))
 
@@ -192,7 +190,6 @@ def test_classification_trainer(tmpdir_factory):
 
 
 def test_classification_trainer_few_classes():
-
     num_it = 10
     batch_size = 8
     # Generate all dependencies
@@ -206,7 +203,6 @@ def test_classification_trainer_few_classes():
 
 
 def test_binary_classification_trainer():
-
     num_it = 10
     batch_size = 8
     # Generate all dependencies
@@ -235,7 +231,6 @@ def test_binary_classification_trainer():
 
 
 def test_segmentation_trainer(tmpdir_factory):
-
     folder = tmpdir_factory.mktemp("checkpoints")
     file_path = str(folder.join("tmp.pt"))
 
@@ -275,7 +270,6 @@ def test_segmentation_trainer(tmpdir_factory):
 
 
 def test_detection_trainer(tmpdir_factory):
-
     folder = tmpdir_factory.mktemp("checkpoints")
     file_path = str(folder.join("tmp.pt"))
 
diff --git a/tests/test_trainer_utils.py b/tests/test_trainer_utils.py
index 66be88e09..884f95534 100644
--- a/tests/test_trainer_utils.py
+++ b/tests/test_trainer_utils.py
@@ -6,7 +6,6 @@
 
 
 def test_freeze_bn():
-
     # Simple module with BN
     mod = nn.Sequential(nn.Conv2d(3, 32, 3), nn.BatchNorm2d(32), nn.ReLU(inplace=True))
     nb = mod[1].num_batches_tracked.clone()
@@ -25,7 +24,6 @@ def test_freeze_bn():
 
 
 def test_freeze_model():
-
     # Simple model
     mod = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(inplace=True), nn.Conv2d(32, 64, 3), nn.ReLU(inplace=True))
     trainer.freeze_model(mod, "0")
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 22df1b2d5..703dbc18f 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -9,15 +9,14 @@
 
 
 def test_resize():
-
     # Arg check
-    with pytest.raises(AssertionError):
+    with pytest.raises(ValueError):
         T.Resize(16)
 
-    with pytest.raises(AssertionError):
+    with pytest.raises(ValueError):
         T.Resize((16, 16), mode="stretch")
 
-    with pytest.raises(AssertionError):
+    with pytest.raises(ValueError):
         T.Resize((16, 16), mode="pad")
 
     img1 = np.full((16, 32, 3), 255, dtype=np.uint8)
@@ -30,12 +29,16 @@ def test_resize():
     assert isinstance(out, Image.Image)
     assert out.size == (32, 32)
     np_out = np.asarray(out)
-    assert np.all(np_out[8:-8] == 255) and np.all(np_out[:8] == 0) and np.all(np_out[-8:]) == 0
+    assert np.all(np_out[8:-8] == 255)
+    assert np.all(np_out[:8] == 0)
+    assert np.all(np_out[-8:]) == 0
     out = tf(Image.fromarray(img2))
     assert isinstance(out, Image.Image)
     assert out.size == (32, 32)
     np_out = np.asarray(out)
-    assert np.all(np_out[:, 8:-8] == 255) and np.all(np_out[:, :8] == 0) and np.all(np_out[:, -8:]) == 0
+    assert np.all(np_out[:, 8:-8] == 255)
+    assert np.all(np_out[:, :8] == 0)
+    assert np.all(np_out[:, -8:]) == 0
     # Squish
     out = T.Resize((32, 32), mode=ResizeMethod.SQUISH)(Image.fromarray(img1))
     assert np.all(np.asarray(out) == 255)
@@ -45,21 +48,24 @@ def test_resize():
     assert isinstance(out, torch.Tensor)
     assert out.shape == (3, 32, 32)
     np_out = out.numpy()
-    assert np.all(np_out[:, 8:-8] == 1) and np.all(np_out[:, :8] == 0) and np.all(np_out[:, -8:]) == 0
+    assert np.all(np_out[:, 8:-8] == 1)
+    assert np.all(np_out[:, :8] == 0)
+    assert np.all(np_out[:, -8:]) == 0
     out = tf(torch.from_numpy(img2).to(dtype=torch.float32).permute(2, 0, 1) / 255)
     assert isinstance(out, torch.Tensor)
     assert out.shape == (3, 32, 32)
     np_out = out.numpy()
-    assert np.all(np_out[:, :, 8:-8] == 1) and np.all(np_out[:, :, :8] == 0) and np.all(np_out[:, :, -8:]) == 0
+    assert np.all(np_out[:, :, 8:-8] == 1)
+    assert np.all(np_out[:, :, :8] == 0)
+    assert np.all(np_out[:, :, -8:]) == 0
 
 
 def test_randomzoomout():
-
     # Arg check
-    with pytest.raises(AssertionError):
+    with pytest.raises(ValueError):
         T.RandomZoomOut(224)
 
-    with pytest.raises(AssertionError):
+    with pytest.raises(ValueError):
         T.Resize((16, 16), (1, 0.5))
 
     pil_img = Image.fromarray(np.full((64, 64, 3), 255, dtype=np.uint8))
@@ -72,11 +78,13 @@ def test_randomzoomout():
     assert isinstance(out, Image.Image)
     assert out.size == (32, 32)
     np_out = np.asarray(out)
-    assert np.all(np_out[16, 16] == 255) and np_out.mean() < 255
+    assert np.all(np_out[16, 16] == 255)
+    assert np_out.mean() < 255
 
     # Tensor
     out = tf(torch_img)
     assert isinstance(out, torch.Tensor)
     assert out.shape == (3, 32, 32)
     np_out = np.asarray(out)
-    assert np.all(np_out[:, 16, 16] == 1) and np_out.mean() < 1
+    assert np.all(np_out[:, 16, 16] == 1)
+    assert np_out.mean() < 1
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 0dc7c831e..6af0e1cac 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -7,7 +7,6 @@
 
 
 def test_mixup():
-
     batch_size = 8
     num_classes = 10
     shape = (3, 32, 32)
@@ -19,7 +18,8 @@ def test_mixup():
     mix_img, mix_target = mix(img.clone(), target.clone())
     assert img.shape == (batch_size, *shape)
     assert not torch.equal(img, mix_img)
-    assert mix_target.dtype == torch.float32 and mix_target.shape == (batch_size, num_classes)
+    assert mix_target.dtype == torch.float32
+    assert mix_target.shape == (batch_size, num_classes)
     assert torch.all(mix_target.sum(dim=1) == 1.0)
     count = (mix_target > 0).sum(dim=1)
     assert torch.all((count == 2.0) | (count == 1.0))
@@ -28,7 +28,8 @@ def test_mixup():
     mix = utils.data.Mixup(num_classes, alpha=0.0)
     mix_img, mix_target = mix(img.clone(), target.clone())
     assert torch.equal(img, mix_img)
-    assert mix_target.dtype == torch.float32 and mix_target.shape == (batch_size, num_classes)
+    assert mix_target.dtype == torch.float32
+    assert mix_target.shape == (batch_size, num_classes)
     assert torch.all(mix_target.sum(dim=1) == 1.0)
     assert torch.all((mix_target > 0).sum(dim=1) == 1.0)
 
@@ -39,7 +40,8 @@ def test_mixup():
     mix_img, mix_target = mix(img.clone(), target.clone())
     assert img.shape == (batch_size, *shape)
     assert not torch.equal(img, mix_img)
-    assert mix_target.dtype == torch.float32 and mix_target.shape == (batch_size, 1)
+    assert mix_target.dtype == torch.float32
+    assert mix_target.shape == (batch_size, 1)
 
     # Already in one-hot
     mix = utils.data.Mixup(num_classes, alpha=0.2)
@@ -47,16 +49,17 @@ def test_mixup():
     mix_img, mix_target = mix(img.clone(), target.clone())
     assert img.shape == (batch_size, *shape)
     assert not torch.equal(img, mix_img)
-    assert mix_target.dtype == torch.float32 and mix_target.shape == (batch_size, num_classes)
+    assert mix_target.dtype == torch.float32
+    assert mix_target.shape == (batch_size, num_classes)
 
 
 @pytest.mark.parametrize(
-    "arr, fn, expected, progress, num_threads",
+    ("arr", "fn", "expected", "progress", "num_threads"),
     [
-        [[1, 2, 3], lambda x: x**2, [1, 4, 9], False, 3],
-        [[1, 2, 3], lambda x: x**2, [1, 4, 9], True, 1],
-        ["hello", lambda x: x.upper(), list("HELLO"), True, None],
-        ["hello", lambda x: x.upper(), list("HELLO"), False, None],
+        ([1, 2, 3], lambda x: x**2, [1, 4, 9], False, 3),
+        ([1, 2, 3], lambda x: x**2, [1, 4, 9], True, 1),
+        ("hello", lambda x: x.upper(), list("HELLO"), True, None),
+        ("hello", lambda x: x.upper(), list("HELLO"), False, None),
     ],
 )
 def test_parallel(arr, fn, expected, progress, num_threads):